This is the methodology used for the Trend CT story: Who, where and how often people are stunned by police in Connecticut

Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central repo)

Data for this analysis was provided by CCSU’s Institute for Municipal and Regional Policy, compiled from reports gathered by individual police departments as mandated by Connecticut law. However, this is a new form of data collection so quality of data varies department to department.

What’s in this script

Several visualizations and tables exploring the data


Bonus: As an interesting exercise, my colleague Jake Kara, also worked did a data analysis for story but with Python.

We livestreamed our process. Jake with Python [video]. Me with R (Sorry, I didn’t realize I was on mute for the first part) [video].

Preparing the data

library(readxl)
library(openxlsx)
stuns <- read_excel("data/2015 Reported Taser Data.xlsx", sheet=1)

# You can also download the data here http://www.ccsu.edu/imrp/projects/files/2015%20Reported%20Taser%20Data.xlsx


stuns[1,] <- ifelse(is.na(stuns[1,]), colnames(stuns), stuns[1,])
colnames(stuns) <- stuns[1,]

stuns <- stuns[-1,]

colnames(stuns) <- make.names(colnames(stuns))

# another stream but for python data analysis going on right now:
# https://www.livecoding.tv/jakekara/

library(dplyr)

# Cleaning up the data

colnames(stuns) <- c("Law.Enforcement.Agency",
                     "Number.of.Incident.Reports",
                     "Incident.Case.Number",
                     "Date.of.Incident",
                     "Time.of.Incident",
                     "Sex",
                     "Race",
                     "Hispanic",
                     "Height",
                     "Weight",
                     "Age",
                     "Deployment.Type.I",
                     "Deployment.Type.II",
                     "Displays.or.Arc",
                     "Drive.Stun.Application",
                     "Activation.After.Probe.Contact",
                     "Number.of.Deployments",
                     "Warning.to.Subject",
                     "Subject.Injured",
                     "Subject.Not.Injured",
                     "Subject.Bruises",
                     "Subject.Abrasions",
                     "Subject.Breathing.Difficulty",
                     "Subject.Probe.Puncture.Only",
                     "Subject.Lost.Consciousness",
                     "Subject.Death",
                     "Subject.Other",
                     "Officer.Injured",
                     "Officer.Not.Injured",
                     "Officer.Bruises",
                     "Officer.Abrasions",
                     "Officer.Breathing.Difficulty",
                     "Officer.Probe.Puncture.Only",
                     "Officer.Lost.Consciousness",
                     "Officer.Death",
                     "Officer.Other",
                     "Location.Environment",
                     "Officer.s.Arrival",
                     "If.Other..Explain",
                     "Crime.in.Progress",
                     "Domestic.Disturbance",
                     "Disturbance..Other.",
                     "Traffic.Stop",
                     "Emotionally.Disturbed.Person",
                     "Suspicious.Person",
                     "Executing.Warrant",
                     "Under.Influence",
                     "Activity.Other",
                     "Non.aggressive",
                     "Previous.Hostility",
                     "Possibly.Intoxicated",
                     "Emotionally.Disturbed",
                     "Aggressive..Verbal.",
                     "Aggressive..Physical.",
                     "Armed.with",
                     "Officer.Perception.Other",
                     "Threat..Hostile",
                     "Dead.Weight..Non.Compliant",
                     "Fighting.Stance..Combative",
                     "Threaten.Use.of.Weapon",
                     "Fleeing",
                     "Unarmed.Assault",
                     "Armed.with.Firearm",
                     "Armed.with.Edged.Weapon",
                     "Armed.with.Blunt.Instrument",
                     "Armed.with.Other",
                     "Failed.to.Follow.Directions",
                     "Suicidal",
                     "Resistance.Other")

stuns$race_ethnicity <- ifelse(stuns$Hispanic==1, "Hispanic", stuns$Race)

Stun incidents by race in the state

by_state <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2))
  
library(knitr)
kable(by_state)
race_ethnicity total percent
Asian 2 0.33
Black 187 30.66
Hispanic 130 21.31
Unknown 1 0.16
White 290 47.54

Total stun incidents per department

library(tidyr)
library(DT)

by_dept_total <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  spread(race_ethnicity, total)

datatable(by_dept_total)

Stun incidents per department by race

by_dept_percent <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2)) %>%
  select(Law.Enforcement.Agency, race_ethnicity, percent) %>%
  spread(race_ethnicity, percent)

datatable(by_dept_percent)

Time of stun incidents

stuns$Time.of.Incident <- convertToDateTime(as.numeric(stuns$Time.of.Incident), origin = "2016-07-04")

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
stuns$hour <- hour(stuns$Time.of.Incident)

library(ggplot2)
library(extrafont)
## Registering fonts with R
library(ggalt)

ggplot(stuns, aes(hour)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

# Time of stun incidents by race

ggplot(stuns, aes(hour, fill=race_ethnicity)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Month

stuns$Date.of.Incident <- as.POSIXct(as.numeric(stuns$Date.of.Incident) * (60*60*24)
                                     , origin="1899-12-30"
                                     , tz="GMT")

stuns$month <- month(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(month)) + geom_bar()

ggplot(stuns, aes(month)) + geom_bar() + facet_grid(race_ethnicity ~.)

Day of the week

stuns$day <- wday(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(day)) + geom_bar()

ggplot(stuns, aes(day)) + geom_bar() + facet_grid(race_ethnicity ~.)

Mapping total stun gun incidents

## Mapping

require(scales)
## Loading required package: scales
require(dplyr)
require(gtools)
## Loading required package: gtools
require(ggplot2)
require(rgdal)
## Loading required package: rgdal
## Warning: package 'rgdal' was built under R version 3.2.5
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3
require(ggmap)
## Loading required package: ggmap
require(Cairo)
## Loading required package: Cairo
require(gpclib)
## Loading required package: gpclib
## General Polygon Clipper Library for R (version 1.5-5)
##  Type 'class ? gpc.poly' for help
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
require(reshape)
## Loading required package: reshape
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:dplyr':
## 
##     rename
library(devtools)
## Warning: package 'devtools' was built under R version 3.2.5
library(stringr)
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
library(sp)


by_dept_total[is.na(by_dept_total)] <-0 
by_dept_total$total <- by_dept_total$Asian + by_dept_total$Black + by_dept_total$Hispanic + by_dept_total$Unknown + by_dept_total$White
  
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
gpclibPermitStatus()
## [1] TRUE
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")

names(by_dept_total)[names(by_dept_total) == 'Law.Enforcement.Agency'] <- 'id'


total_map <- left_join(townborders, by_dept_total)
## Joining by: "id"
tm_ct <- ggplot() +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Where people are tased (total)", fill="")

print(tm_ct)

Mapping percent by race

# White
names(by_dept_percent)[names(by_dept_percent) == 'Law.Enforcement.Agency'] <- 'id'


percent_map <- left_join(townborders, by_dept_percent)
## Joining by: "id"
tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of white suspects tased", fill="")

print(tm_ct)

# Black

tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Black suspects tased", fill="")

print(tm_ct)

# Hispanic
tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Hispanic suspects tased", fill="")

print(tm_ct)

Gender breakdown

gender <- stuns %>%
  group_by(Sex) %>%
  summarise(Count=n()) %>%
  mutate(Percent=round(Count/sum(Count)*100,2))

kable(gender)
Sex Count Percent
Female 37 6.07
Male 573 93.93

Median age

# Median age

median(as.numeric(stuns$Age), na.rm=T)
## [1] 32

Age distribution

stuns$Age <- as.numeric(stuns$Age)

# Age distribution
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=1)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

ggplot(stuns, aes(Age)) + geom_histogram(binwidth=10)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

stuns3 <- subset(stuns, Age < 100)
stuns3 <- subset(stuns3, race_ethnicity!="Asian")
stuns3 <- subset(stuns3, race_ethnicity!="Unknown")

gg <- ggplot(stuns3, aes(Age)) + geom_histogram(fill="#bf6151", binwidth=2) + facet_grid(race_ethnicity ~ .)
gg <- gg + labs(x="Age", y="Stuns deployed", title="Distribution of stuns by age",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

Where does this happen?

locations <- stuns %>%
  group_by(Location.Environment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2))

kable(locations)
Location.Environment count percent
Commercial Establishment 20 3.28
Educational Facility 7 1.15
Indoors-Private Property 16 2.62
Indoors-Public Building 44 7.21
Other Residence 31 5.08
Outdoors-Private Property 76 12.46
Outdoors-Public Area 241 39.51
Subject’s Residence 172 28.20
NA 3 0.49
gg <- ggplot(stuns3, aes(Location.Environment)) + facet_grid(race_ethnicity ~.) + geom_bar(fill="#bf6151") + coord_flip()
gg <- gg + labs(x=NULL, y="Stuns deployed", title="Distribution of stuns by location",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22, hjust=-1.1))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
## Warning: Removed 3 rows containing non-finite values (stat_count).

What is the subject holding

stuns$Armed.with <- str_to_title(stuns$Armed.with)
weapon <- stuns %>%
  group_by(Armed.with) %>%
  summarise(count=n()) %>%
  arrange(-count)

weapon <- subset(weapon, !is.na(Armed.with))
kable(weapon)
Armed.with count
Knife 26
Baseball Bat 3
Broken Glass 3
Firearm 3
Hammer 3
Vehicle 3
Box Cutter 2
Possible Knife 2
1 1
12" Kitchen Knife 1
15" Wooden Stake 1
2 Knives Within Wingspan 1
Arrow 1
Butcher Knife 1
Chair 1
Facsimile Firearm 1
Glass Bottle, Possibly Sledgehammer 1
Handgun 1
Handgun Pointed (Later Determined Replica) 1
Heavy Metal Stanchion 1
Knife 1
Knife And Dagger 1
Knife Sharpening Steel 14" Long 1
Knife To His Neck 1
Knife, Possibly A Machete 1
Large Butcher Knife 1
Large K-Bar Knife 1
Large Kitchen Knife 1
Large Stick 1
Metal Cane 1
Metal Pipe 1
Object With Pointed Tip In Hand 1
Pistol 1
Pitbull 1
Pocket Knife 1
Possible Firearm 1
Possible Gun 1
Possible Weapon-History Of Weapons Charges 1
Possibly Long Rifle 1
Rifle 1
Sword 1
Threatened Use Of Knife To Kill 1
Unknown If Armed 1
Unknown Object In Hand 1
Utility Knife 1
Winged Cork Screw 1
Wooden Closet Rod 1

How often tased

stuns$Number.of.Deployments <- as.numeric(stuns$Number.of.Deployments)
## Warning: NAs introduced by coercion
deployed <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(mean(Number.of.Deployments, na.rm=T))

kable(deployed)
race_ethnicity mean(Number.of.Deployments, na.rm = T)
Asian 1.0000000
Black 1.1314286
Hispanic 1.1869919
Unknown 1.0000000
White 0.9037037

Doesn’t say much. Let’s try another factor

stuns$deployment <- ifelse(stuns$Number.of.Deployments>1, "multiple", "single")
stuns$deployment <- ifelse(stuns$Number.of.Deployments==0, "none", stuns$deployment)

deployed <- stuns %>%
  group_by(race_ethnicity, deployment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2)) 

deployed <- deployed[c("race_ethnicity", "deployment", "percent")]

deployed <- subset(deployed, !is.na(deployment))

deployed <- deployed %>%
  spread(deployment, percent)

kable(deployed)
race_ethnicity multiple none single
Asian NA NA 100.00
Black 19.79 19.79 54.01
Hispanic 27.69 33.08 33.85
Unknown NA NA 100.00
White 17.93 40.00 35.17

Weights

stuns$Weight <- as.numeric(stuns$Weight)
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Weight)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Pounds", title="Distribution of number of stuns by suspect's weight",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

b2<-ggplot(stuns, aes(factor(Number.of.Deployments), Weight)) + 
  geom_jitter(alpha=I(1/4), aes(color=Number.of.Deployments)) +
  theme(legend.position = "none")

Heights

stuns$Height <- ifelse(stuns$Height=="6'", "6'0\"", stuns$Height)
stuns$Height <- ifelse(stuns$Height=="5'", "5'0\"", stuns$Height)

stuns$Height.inches <- sapply(strsplit(as.character(stuns$Height),"'|\""),
       function(x){12*as.numeric(x[1]) + as.numeric(x[2])})
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Height.inches)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Inches", title="Distribution of number of stuns by suspect's height",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg