This is the methodology used for the Trend CT story: Who, where and how often people are stunned by police in Connecticut
Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central repo)
Data for this analysis was provided by CCSU’s Institute for Municipal and Regional Policy, compiled from reports gathered by individual police departments as mandated by Connecticut law. However, this is a new form of data collection so quality of data varies department to department.
What’s in this script
Several visualizations and tables exploring the data
Bonus: As an interesting exercise, my colleague Jake Kara, also worked did a data analysis for story but with Python.
We livestreamed our process. Jake with Python [video]. Me with R (Sorry, I didn’t realize I was on mute for the first part) [video].
library(readxl)
library(openxlsx)
stuns <- read_excel("data/2015 Reported Taser Data.xlsx", sheet=1)
# You can also download the data here http://www.ccsu.edu/imrp/projects/files/2015%20Reported%20Taser%20Data.xlsx
stuns[1,] <- ifelse(is.na(stuns[1,]), colnames(stuns), stuns[1,])
colnames(stuns) <- stuns[1,]
stuns <- stuns[-1,]
colnames(stuns) <- make.names(colnames(stuns))
# another stream but for python data analysis going on right now:
# https://www.livecoding.tv/jakekara/
library(dplyr)
# Cleaning up the data
colnames(stuns) <- c("Law.Enforcement.Agency",
"Number.of.Incident.Reports",
"Incident.Case.Number",
"Date.of.Incident",
"Time.of.Incident",
"Sex",
"Race",
"Hispanic",
"Height",
"Weight",
"Age",
"Deployment.Type.I",
"Deployment.Type.II",
"Displays.or.Arc",
"Drive.Stun.Application",
"Activation.After.Probe.Contact",
"Number.of.Deployments",
"Warning.to.Subject",
"Subject.Injured",
"Subject.Not.Injured",
"Subject.Bruises",
"Subject.Abrasions",
"Subject.Breathing.Difficulty",
"Subject.Probe.Puncture.Only",
"Subject.Lost.Consciousness",
"Subject.Death",
"Subject.Other",
"Officer.Injured",
"Officer.Not.Injured",
"Officer.Bruises",
"Officer.Abrasions",
"Officer.Breathing.Difficulty",
"Officer.Probe.Puncture.Only",
"Officer.Lost.Consciousness",
"Officer.Death",
"Officer.Other",
"Location.Environment",
"Officer.s.Arrival",
"If.Other..Explain",
"Crime.in.Progress",
"Domestic.Disturbance",
"Disturbance..Other.",
"Traffic.Stop",
"Emotionally.Disturbed.Person",
"Suspicious.Person",
"Executing.Warrant",
"Under.Influence",
"Activity.Other",
"Non.aggressive",
"Previous.Hostility",
"Possibly.Intoxicated",
"Emotionally.Disturbed",
"Aggressive..Verbal.",
"Aggressive..Physical.",
"Armed.with",
"Officer.Perception.Other",
"Threat..Hostile",
"Dead.Weight..Non.Compliant",
"Fighting.Stance..Combative",
"Threaten.Use.of.Weapon",
"Fleeing",
"Unarmed.Assault",
"Armed.with.Firearm",
"Armed.with.Edged.Weapon",
"Armed.with.Blunt.Instrument",
"Armed.with.Other",
"Failed.to.Follow.Directions",
"Suicidal",
"Resistance.Other")
stuns$race_ethnicity <- ifelse(stuns$Hispanic==1, "Hispanic", stuns$Race)
by_state <- stuns %>%
group_by(race_ethnicity) %>%
summarise(total=n()) %>%
mutate(percent=round(total/sum(total)*100,2))
library(knitr)
kable(by_state)
race_ethnicity | total | percent |
---|---|---|
Asian | 2 | 0.33 |
Black | 187 | 30.66 |
Hispanic | 130 | 21.31 |
Unknown | 1 | 0.16 |
White | 290 | 47.54 |
library(tidyr)
library(DT)
by_dept_total <- stuns %>%
group_by(Law.Enforcement.Agency, race_ethnicity) %>%
summarise(total=n()) %>%
spread(race_ethnicity, total)
datatable(by_dept_total)
by_dept_percent <- stuns %>%
group_by(Law.Enforcement.Agency, race_ethnicity) %>%
summarise(total=n()) %>%
mutate(percent=round(total/sum(total)*100,2)) %>%
select(Law.Enforcement.Agency, race_ethnicity, percent) %>%
spread(race_ethnicity, percent)
datatable(by_dept_percent)
stuns$Time.of.Incident <- convertToDateTime(as.numeric(stuns$Time.of.Incident), origin = "2016-07-04")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
stuns$hour <- hour(stuns$Time.of.Incident)
library(ggplot2)
library(extrafont)
## Registering fonts with R
library(ggalt)
ggplot(stuns, aes(hour)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).
# Time of stun incidents by race
ggplot(stuns, aes(hour, fill=race_ethnicity)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).
stuns$Date.of.Incident <- as.POSIXct(as.numeric(stuns$Date.of.Incident) * (60*60*24)
, origin="1899-12-30"
, tz="GMT")
stuns$month <- month(stuns$Date.of.Incident, label=TRUE)
ggplot(stuns, aes(month)) + geom_bar()
ggplot(stuns, aes(month)) + geom_bar() + facet_grid(race_ethnicity ~.)
stuns$day <- wday(stuns$Date.of.Incident, label=TRUE)
ggplot(stuns, aes(day)) + geom_bar()
ggplot(stuns, aes(day)) + geom_bar() + facet_grid(race_ethnicity ~.)
## Mapping
require(scales)
## Loading required package: scales
require(dplyr)
require(gtools)
## Loading required package: gtools
require(ggplot2)
require(rgdal)
## Loading required package: rgdal
## Warning: package 'rgdal' was built under R version 3.2.5
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
## Linking to sp version: 1.2-3
require(ggmap)
## Loading required package: ggmap
require(Cairo)
## Loading required package: Cairo
require(gpclib)
## Loading required package: gpclib
## General Polygon Clipper Library for R (version 1.5-5)
## Type 'class ? gpc.poly' for help
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
require(reshape)
## Loading required package: reshape
##
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:dplyr':
##
## rename
library(devtools)
## Warning: package 'devtools' was built under R version 3.2.5
library(stringr)
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:dplyr':
##
## select
library(sp)
by_dept_total[is.na(by_dept_total)] <-0
by_dept_total$total <- by_dept_total$Asian + by_dept_total$Black + by_dept_total$Hispanic + by_dept_total$Unknown + by_dept_total$White
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
gpclibPermitStatus()
## [1] TRUE
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")
names(by_dept_total)[names(by_dept_total) == 'Law.Enforcement.Agency'] <- 'id'
total_map <- left_join(townborders, by_dept_total)
## Joining by: "id"
tm_ct <- ggplot() +
geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Where people are tased (total)", fill="")
print(tm_ct)
# White
names(by_dept_percent)[names(by_dept_percent) == 'Law.Enforcement.Agency'] <- 'id'
percent_map <- left_join(townborders, by_dept_percent)
## Joining by: "id"
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of white suspects tased", fill="")
print(tm_ct)
# Black
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of Black suspects tased", fill="")
print(tm_ct)
# Hispanic
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of Hispanic suspects tased", fill="")
print(tm_ct)
gender <- stuns %>%
group_by(Sex) %>%
summarise(Count=n()) %>%
mutate(Percent=round(Count/sum(Count)*100,2))
kable(gender)
Sex | Count | Percent |
---|---|---|
Female | 37 | 6.07 |
Male | 573 | 93.93 |
# Median age
median(as.numeric(stuns$Age), na.rm=T)
## [1] 32
stuns$Age <- as.numeric(stuns$Age)
# Age distribution
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=1)
## Warning: Removed 6 rows containing non-finite values (stat_bin).
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=10)
## Warning: Removed 6 rows containing non-finite values (stat_bin).
stuns3 <- subset(stuns, Age < 100)
stuns3 <- subset(stuns3, race_ethnicity!="Asian")
stuns3 <- subset(stuns3, race_ethnicity!="Unknown")
gg <- ggplot(stuns3, aes(Age)) + geom_histogram(fill="#bf6151", binwidth=2) + facet_grid(race_ethnicity ~ .)
gg <- gg + labs(x="Age", y="Stuns deployed", title="Distribution of stuns by age",
subtitle="",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
locations <- stuns %>%
group_by(Location.Environment) %>%
summarise(count=n()) %>%
mutate(percent=round(count/sum(count)*100,2))
kable(locations)
Location.Environment | count | percent |
---|---|---|
Commercial Establishment | 20 | 3.28 |
Educational Facility | 7 | 1.15 |
Indoors-Private Property | 16 | 2.62 |
Indoors-Public Building | 44 | 7.21 |
Other Residence | 31 | 5.08 |
Outdoors-Private Property | 76 | 12.46 |
Outdoors-Public Area | 241 | 39.51 |
Subject’s Residence | 172 | 28.20 |
NA | 3 | 0.49 |
gg <- ggplot(stuns3, aes(Location.Environment)) + facet_grid(race_ethnicity ~.) + geom_bar(fill="#bf6151") + coord_flip()
gg <- gg + labs(x=NULL, y="Stuns deployed", title="Distribution of stuns by location",
subtitle="",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22, hjust=-1.1))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
## Warning: Removed 3 rows containing non-finite values (stat_count).
stuns$Armed.with <- str_to_title(stuns$Armed.with)
weapon <- stuns %>%
group_by(Armed.with) %>%
summarise(count=n()) %>%
arrange(-count)
weapon <- subset(weapon, !is.na(Armed.with))
kable(weapon)
Armed.with | count |
---|---|
Knife | 26 |
Baseball Bat | 3 |
Broken Glass | 3 |
Firearm | 3 |
Hammer | 3 |
Vehicle | 3 |
Box Cutter | 2 |
Possible Knife | 2 |
1 | 1 |
12" Kitchen Knife | 1 |
15" Wooden Stake | 1 |
2 Knives Within Wingspan | 1 |
Arrow | 1 |
Butcher Knife | 1 |
Chair | 1 |
Facsimile Firearm | 1 |
Glass Bottle, Possibly Sledgehammer | 1 |
Handgun | 1 |
Handgun Pointed (Later Determined Replica) | 1 |
Heavy Metal Stanchion | 1 |
Knife | 1 |
Knife And Dagger | 1 |
Knife Sharpening Steel 14" Long | 1 |
Knife To His Neck | 1 |
Knife, Possibly A Machete | 1 |
Large Butcher Knife | 1 |
Large K-Bar Knife | 1 |
Large Kitchen Knife | 1 |
Large Stick | 1 |
Metal Cane | 1 |
Metal Pipe | 1 |
Object With Pointed Tip In Hand | 1 |
Pistol | 1 |
Pitbull | 1 |
Pocket Knife | 1 |
Possible Firearm | 1 |
Possible Gun | 1 |
Possible Weapon-History Of Weapons Charges | 1 |
Possibly Long Rifle | 1 |
Rifle | 1 |
Sword | 1 |
Threatened Use Of Knife To Kill | 1 |
Unknown If Armed | 1 |
Unknown Object In Hand | 1 |
Utility Knife | 1 |
Winged Cork Screw | 1 |
Wooden Closet Rod | 1 |
stuns$Number.of.Deployments <- as.numeric(stuns$Number.of.Deployments)
## Warning: NAs introduced by coercion
deployed <- stuns %>%
group_by(race_ethnicity) %>%
summarise(mean(Number.of.Deployments, na.rm=T))
kable(deployed)
race_ethnicity | mean(Number.of.Deployments, na.rm = T) |
---|---|
Asian | 1.0000000 |
Black | 1.1314286 |
Hispanic | 1.1869919 |
Unknown | 1.0000000 |
White | 0.9037037 |
stuns$deployment <- ifelse(stuns$Number.of.Deployments>1, "multiple", "single")
stuns$deployment <- ifelse(stuns$Number.of.Deployments==0, "none", stuns$deployment)
deployed <- stuns %>%
group_by(race_ethnicity, deployment) %>%
summarise(count=n()) %>%
mutate(percent=round(count/sum(count)*100,2))
deployed <- deployed[c("race_ethnicity", "deployment", "percent")]
deployed <- subset(deployed, !is.na(deployment))
deployed <- deployed %>%
spread(deployment, percent)
kable(deployed)
race_ethnicity | multiple | none | single |
---|---|---|---|
Asian | NA | NA | 100.00 |
Black | 19.79 | 19.79 | 54.01 |
Hispanic | 27.69 | 33.08 | 33.85 |
Unknown | NA | NA | 100.00 |
White | 17.93 | 40.00 | 35.17 |
stuns$Weight <- as.numeric(stuns$Weight)
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))
gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Weight))
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments))))
gg <- gg + labs(x="Stuns deployed", y="Pounds", title="Distribution of number of stuns by suspect's weight",
subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
b2<-ggplot(stuns, aes(factor(Number.of.Deployments), Weight)) +
geom_jitter(alpha=I(1/4), aes(color=Number.of.Deployments)) +
theme(legend.position = "none")
stuns$Height <- ifelse(stuns$Height=="6'", "6'0\"", stuns$Height)
stuns$Height <- ifelse(stuns$Height=="5'", "5'0\"", stuns$Height)
stuns$Height.inches <- sapply(strsplit(as.character(stuns$Height),"'|\""),
function(x){12*as.numeric(x[1]) + as.numeric(x[2])})
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))
gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Height.inches))
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments))))
gg <- gg + labs(x="Stuns deployed", y="Inches", title="Distribution of number of stuns by suspect's height",
subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg