Data analysis of police stun gun use in CT

This is the methodology used for the Trend CT story: Who, where and how often people are stunned by police in Connecticut

Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central repo)

Data for this analysis was provided by CCSU’s Institute for Municipal and Regional Policy, compiled from reports gathered by individual police departments as mandated by Connecticut law. However, this is a new form of data collection so quality of data varies department to department.

What’s in this script

Several visualizations and tables exploring the data

Bonus: As an interesting exercise, my colleague Jake Kara, also worked did a data analysis for story but with Python.

We livestreamed our process. Jake with Python [video]. Me with R (Sorry, I didn’t realize I was on mute for the first part) [video].

Preparing the data

library(readxl)
library(openxlsx)
stuns <- read_excel("data/2015 Reported Taser Data.xlsx", sheet=1)

# You can also download the data here http://www.ccsu.edu/imrp/projects/files/2015%20Reported%20Taser%20Data.xlsx


stuns[1,] <- ifelse(is.na(stuns[1,]), colnames(stuns), stuns[1,])
colnames(stuns) <- stuns[1,]

stuns <- stuns[-1,]

colnames(stuns) <- make.names(colnames(stuns))

# another stream but for python data analysis going on right now:
# https://www.livecoding.tv/jakekara/

library(dplyr)

# Cleaning up the data

colnames(stuns) <- c("Law.Enforcement.Agency",
                     "Number.of.Incident.Reports",
                     "Incident.Case.Number",
                     "Date.of.Incident",
                     "Time.of.Incident",
                     "Sex",
                     "Race",
                     "Hispanic",
                     "Height",
                     "Weight",
                     "Age",
                     "Deployment.Type.I",
                     "Deployment.Type.II",
                     "Displays.or.Arc",
                     "Drive.Stun.Application",
                     "Activation.After.Probe.Contact",
                     "Number.of.Deployments",
                     "Warning.to.Subject",
                     "Subject.Injured",
                     "Subject.Not.Injured",
                     "Subject.Bruises",
                     "Subject.Abrasions",
                     "Subject.Breathing.Difficulty",
                     "Subject.Probe.Puncture.Only",
                     "Subject.Lost.Consciousness",
                     "Subject.Death",
                     "Subject.Other",
                     "Officer.Injured",
                     "Officer.Not.Injured",
                     "Officer.Bruises",
                     "Officer.Abrasions",
                     "Officer.Breathing.Difficulty",
                     "Officer.Probe.Puncture.Only",
                     "Officer.Lost.Consciousness",
                     "Officer.Death",
                     "Officer.Other",
                     "Location.Environment",
                     "Officer.s.Arrival",
                     "If.Other..Explain",
                     "Crime.in.Progress",
                     "Domestic.Disturbance",
                     "Disturbance..Other.",
                     "Traffic.Stop",
                     "Emotionally.Disturbed.Person",
                     "Suspicious.Person",
                     "Executing.Warrant",
                     "Under.Influence",
                     "Activity.Other",
                     "Non.aggressive",
                     "Previous.Hostility",
                     "Possibly.Intoxicated",
                     "Emotionally.Disturbed",
                     "Aggressive..Verbal.",
                     "Aggressive..Physical.",
                     "Armed.with",
                     "Officer.Perception.Other",
                     "Threat..Hostile",
                     "Dead.Weight..Non.Compliant",
                     "Fighting.Stance..Combative",
                     "Threaten.Use.of.Weapon",
                     "Fleeing",
                     "Unarmed.Assault",
                     "Armed.with.Firearm",
                     "Armed.with.Edged.Weapon",
                     "Armed.with.Blunt.Instrument",
                     "Armed.with.Other",
                     "Failed.to.Follow.Directions",
                     "Suicidal",
                     "Resistance.Other")

stuns$race_ethnicity <- ifelse(stuns$Hispanic==1, "Hispanic", stuns$Race)

Stun incidents by race in the state

by_state <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2))
  
library(knitr)
kable(by_state)

race_ethnicity	total	percent
Asian	2	0.33
Black	187	30.66
Hispanic	130	21.31
Unknown	1	0.16
White	290	47.54

Total stun incidents per department

library(tidyr)
library(DT)

by_dept_total <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  spread(race_ethnicity, total)

datatable(by_dept_total)

Stun incidents per department by race

by_dept_percent <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2)) %>%
  select(Law.Enforcement.Agency, race_ethnicity, percent) %>%
  spread(race_ethnicity, percent)

datatable(by_dept_percent)

Time of stun incidents

stuns$Time.of.Incident <- convertToDateTime(as.numeric(stuns$Time.of.Incident), origin = "2016-07-04")

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

stuns$hour <- hour(stuns$Time.of.Incident)

library(ggplot2)
library(extrafont)

## Registering fonts with R

library(ggalt)

ggplot(stuns, aes(hour)) + geom_histogram(binwidth=1)

## Warning: Removed 2 rows containing non-finite values (stat_bin).

# Time of stun incidents by race

ggplot(stuns, aes(hour, fill=race_ethnicity)) + geom_histogram(binwidth=1)

## Warning: Removed 2 rows containing non-finite values (stat_bin).

Month

stuns$Date.of.Incident <- as.POSIXct(as.numeric(stuns$Date.of.Incident) * (60*60*24)
                                     , origin="1899-12-30"
                                     , tz="GMT")

stuns$month <- month(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(month)) + geom_bar()

ggplot(stuns, aes(month)) + geom_bar() + facet_grid(race_ethnicity ~.)

Day of the week

stuns$day <- wday(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(day)) + geom_bar()

ggplot(stuns, aes(day)) + geom_bar() + facet_grid(race_ethnicity ~.)

Mapping total stun gun incidents

## Mapping

require(scales)

## Loading required package: scales

require(dplyr)
require(gtools)

## Loading required package: gtools

require(ggplot2)
require(rgdal)

## Loading required package: rgdal

## Warning: package 'rgdal' was built under R version 3.2.5

## Loading required package: sp

## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3

require(ggmap)

## Loading required package: ggmap

require(Cairo)

## Loading required package: Cairo

require(gpclib)

## Loading required package: gpclib

## General Polygon Clipper Library for R (version 1.5-5)
##  Type 'class ? gpc.poly' for help

require(maptools)

## Loading required package: maptools

## Checking rgeos availability: TRUE

require(reshape)

## Loading required package: reshape

## 
## Attaching package: 'reshape'

## The following object is masked from 'package:lubridate':
## 
##     stamp

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following object is masked from 'package:dplyr':
## 
##     rename

library(devtools)

## Warning: package 'devtools' was built under R version 3.2.5

library(stringr)
library(raster)

## 
## Attaching package: 'raster'

## The following object is masked from 'package:tidyr':
## 
##     extract

## The following object is masked from 'package:dplyr':
## 
##     select

library(sp)


by_dept_total[is.na(by_dept_total)] <-0 
by_dept_total$total <- by_dept_total$Asian + by_dept_total$Black + by_dept_total$Hispanic + by_dept_total$Unknown + by_dept_total$White
  
gpclibPermit()

## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release

## [1] TRUE

gpclibPermitStatus()

## [1] TRUE

townborders <- readOGR(dsn="maps", layer="ctgeo")

## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields

townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")

names(by_dept_total)[names(by_dept_total) == 'Law.Enforcement.Agency'] <- 'id'


total_map <- left_join(townborders, by_dept_total)

## Joining by: "id"

tm_ct <- ggplot() +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Where people are tased (total)", fill="")

print(tm_ct)

Mapping percent by race

# White
names(by_dept_percent)[names(by_dept_percent) == 'Law.Enforcement.Agency'] <- 'id'


percent_map <- left_join(townborders, by_dept_percent)

## Joining by: "id"

tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of white suspects tased", fill="")

print(tm_ct)

# Black

tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Black suspects tased", fill="")

print(tm_ct)

# Hispanic
tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Hispanic suspects tased", fill="")

print(tm_ct)

Gender breakdown

gender <- stuns %>%
  group_by(Sex) %>%
  summarise(Count=n()) %>%
  mutate(Percent=round(Count/sum(Count)*100,2))

kable(gender)

Sex	Count	Percent
Female	37	6.07
Male	573	93.93

Median age

# Median age

median(as.numeric(stuns$Age), na.rm=T)

## [1] 32

Age distribution

stuns$Age <- as.numeric(stuns$Age)

# Age distribution
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=1)

## Warning: Removed 6 rows containing non-finite values (stat_bin).

ggplot(stuns, aes(Age)) + geom_histogram(binwidth=10)

## Warning: Removed 6 rows containing non-finite values (stat_bin).

stuns3 <- subset(stuns, Age < 100)
stuns3 <- subset(stuns3, race_ethnicity!="Asian")
stuns3 <- subset(stuns3, race_ethnicity!="Unknown")

gg <- ggplot(stuns3, aes(Age)) + geom_histogram(fill="#bf6151", binwidth=2) + facet_grid(race_ethnicity ~ .)
gg <- gg + labs(x="Age", y="Stuns deployed", title="Distribution of stuns by age",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

Where does this happen?

locations <- stuns %>%
  group_by(Location.Environment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2))

kable(locations)

Location.Environment	count	percent
Commercial Establishment	20	3.28
Educational Facility	7	1.15
Indoors-Private Property	16	2.62
Indoors-Public Building	44	7.21
Other Residence	31	5.08
Outdoors-Private Property	76	12.46
Outdoors-Public Area	241	39.51
Subject’s Residence	172	28.20
NA	3	0.49

gg <- ggplot(stuns3, aes(Location.Environment)) + facet_grid(race_ethnicity ~.) + geom_bar(fill="#bf6151") + coord_flip()
gg <- gg + labs(x=NULL, y="Stuns deployed", title="Distribution of stuns by location",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22, hjust=-1.1))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

## Warning: Removed 3 rows containing non-finite values (stat_count).

What is the subject holding

stuns$Armed.with <- str_to_title(stuns$Armed.with)
weapon <- stuns %>%
  group_by(Armed.with) %>%
  summarise(count=n()) %>%
  arrange(-count)

weapon <- subset(weapon, !is.na(Armed.with))
kable(weapon)

Armed.with	count
Knife	26
Baseball Bat	3
Broken Glass	3
Firearm	3
Hammer	3
Vehicle	3
Box Cutter	2
Possible Knife	2
1	1
12" Kitchen Knife	1
15" Wooden Stake	1
2 Knives Within Wingspan	1
Arrow	1
Butcher Knife	1
Chair	1
Facsimile Firearm	1
Glass Bottle, Possibly Sledgehammer	1
Handgun	1
Handgun Pointed (Later Determined Replica)	1
Heavy Metal Stanchion	1
Knife	1
Knife And Dagger	1
Knife Sharpening Steel 14" Long	1
Knife To His Neck	1
Knife, Possibly A Machete	1
Large Butcher Knife	1
Large K-Bar Knife	1
Large Kitchen Knife	1
Large Stick	1
Metal Cane	1
Metal Pipe	1
Object With Pointed Tip In Hand	1
Pistol	1
Pitbull	1
Pocket Knife	1
Possible Firearm	1
Possible Gun	1
Possible Weapon-History Of Weapons Charges	1
Possibly Long Rifle	1
Rifle	1
Sword	1
Threatened Use Of Knife To Kill	1
Unknown If Armed	1
Unknown Object In Hand	1
Utility Knife	1
Winged Cork Screw	1
Wooden Closet Rod	1

How often tased

stuns$Number.of.Deployments <- as.numeric(stuns$Number.of.Deployments)

## Warning: NAs introduced by coercion

deployed <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(mean(Number.of.Deployments, na.rm=T))

kable(deployed)

race_ethnicity	mean(Number.of.Deployments, na.rm = T)
Asian	1.0000000
Black	1.1314286
Hispanic	1.1869919
Unknown	1.0000000
White	0.9037037

Doesn’t say much. Let’s try another factor

stuns$deployment <- ifelse(stuns$Number.of.Deployments>1, "multiple", "single")
stuns$deployment <- ifelse(stuns$Number.of.Deployments==0, "none", stuns$deployment)

deployed <- stuns %>%
  group_by(race_ethnicity, deployment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2)) 

deployed <- deployed[c("race_ethnicity", "deployment", "percent")]

deployed <- subset(deployed, !is.na(deployment))

deployed <- deployed %>%
  spread(deployment, percent)

kable(deployed)

race_ethnicity	multiple	none	single
Asian	NA	NA	100.00
Black	19.79	19.79	54.01
Hispanic	27.69	33.08	33.85
Unknown	NA	NA	100.00
White	17.93	40.00	35.17

Weights

stuns$Weight <- as.numeric(stuns$Weight)
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Weight)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Pounds", title="Distribution of number of stuns by suspect's weight",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

b2<-ggplot(stuns, aes(factor(Number.of.Deployments), Weight)) + 
  geom_jitter(alpha=I(1/4), aes(color=Number.of.Deployments)) +
  theme(legend.position = "none")

Heights

stuns$Height <- ifelse(stuns$Height=="6'", "6'0\"", stuns$Height)
stuns$Height <- ifelse(stuns$Height=="5'", "5'0\"", stuns$Height)

stuns$Height.inches <- sapply(strsplit(as.character(stuns$Height),"'|\""),
       function(x){12*as.numeric(x[1]) + as.numeric(x[2])})
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Height.inches)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Inches", title="Distribution of number of stuns by suspect's height",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg