An analysis for the TrendCT story: Police departments that hand out the most traffic tickets in Connecticut

The data is from the Connecticut Racial Profiling Prohibition Project hosted at the CT Data Collaborative which collects traffic incident reports between October 2013 and September 2014.

Skip ahead and download the summary dataset generated by this analysis.

library(lubridate)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
incidents <- read.csv("http://ctrp3viz.s3.amazonaws.com/data/Connecticut_r1.csv", stringsAsFactors=FALSE)

# Fix times
incidents$RealDate <- as.POSIXct(as.Date(incidents$InterventionDateTime, origin="1899-12-30"))
incidents$RealTime <- format(as.POSIXct((incidents$InterventionTime) * 86400, origin = "1899-12-30", tz="America/Montserrat"), "%H:%M")
incidents$Hour <- hour(incidents$RealDate)


# Let's look at rate per department employment
# First, let's bring in FBI Uniform Crime Report that lists 2013 department employment

pers <- read.csv("police_dept.csv", stringsAsFactors=FALSE)

# Calculate total sworn officers
pers$sworn <- pers$sworn.male+ pers$sworn.female

# Tallying tickets by department
incidents_by_dept <- data.frame(table(incidents$Department.Name))
colnames(incidents_by_dept) <- c("name", "incidents")
incidents_by_dept$name <- toupper(incidents_by_dept$name)
incidents_by_dept <- left_join(incidents_by_dept, pers)
## Joining by: "name"
# Calculating tickets per officer
incidents_by_dept$inc.rate <- round((incidents_by_dept$incidents/incidents_by_dept$sworn), digits=2)

reordered <- incidents_by_dept[order(incidents_by_dept$inc.rate),]

# Creating some stand-in data
reordered$first_most <- "whatever"
reordered$second_most <- "whatevs"
reordered$pop_day <- "funday"
reordered$pop_hour <- "fun o'clock"

library(stringr)
inc <- incidents
inc$Department.Name <- str_to_upper(inc$Department.Name)

# Loop to figure out first- and second-most ticketed offense
# Also most popular day of the week and hour

reo_list <- 1:nrow(reordered)

for (i in reo_list) {
  dept_name <- reordered$name[i]
  sub <- subset(inc, Department.Name==dept_name)
  sub_dat <- data.frame(table(sub$StatutoryReasonForStop))
  sub_dat$Var1 <- as.character(sub_dat$Var1)
  sub_dat <- sub_dat[order(-sub_dat$Freq),] 
  reordered$first_most[i] <- sub_dat$Var1[1]
  reordered$second_most[i] <- sub_dat$Var1[2]
  sub_day <- data.frame(table(sub$Day.of.Week))
  sub_day$Var1 <- as.character(sub_day$Var1)
  sub_day <- sub_day[order(-sub_day$Freq),]
  reordered$pop_day[i] <- sub_day$Var1[1]
  sub_time <- data.frame(table(sub$Hour))
  sub_time <- sub_time[order(-sub_time$Freq),] 
  if (sub_time$Var1[1]==0) {
    reordered$pop_hour[i] <- "12 am"
    } else if (sub_time$Var1[1]==1) {
    reordered$pop_hour[i] <- "1 am"
    } else if (sub_time$Var1[1]==2) {
    reordered$pop_hour[i] <- "2 am"
    } else if (sub_time$Var1[1]==3) {
    reordered$pop_hour[i] <- "3 am"
    } else if (sub_time$Var1[1]==4) {
    reordered$pop_hour[i] <- "4 am"
    } else if (sub_time$Var1[1]==5) {
    reordered$pop_hour[i] <- "5 am"
    } else if (sub_time$Var1[1]==6) {
    reordered$pop_hour[i] <- "6 am"
    } else if (sub_time$Var1[1]==7) {
    reordered$pop_hour[i] <- "7 am"
    } else if (sub_time$Var1[1]==8) {
    reordered$pop_hour[i] <- "8 am"
    } else if (sub_time$Var1[1]==9) {
    reordered$pop_hour[i] <- "9 am"
    } else if (sub_time$Var1[1]==10) {
    reordered$pop_hour[i] <- "10 am"
    } else if (sub_time$Var1[1]==11) {
    reordered$pop_hour[i] <- "11 am"
    } else if (sub_time$Var1[1]==12) {
    reordered$pop_hour[i] <- "12 pm"
    } else if (sub_time$Var1[1]==13) {
    reordered$pop_hour[i] <- "1 pm"
    } else if (sub_time$Var1[1]==14) {
    reordered$pop_hour[i] <- "2 pm"
    } else if (sub_time$Var1[1]==15) {
    reordered$pop_hour[i] <- "3 pm"
    } else if (sub_time$Var1[1]==16) {
    reordered$pop_hour[i] <- "4 pm"
    } else if (sub_time$Var1[1]==17) {
    reordered$pop_hour[i] <- "5 pm"
    } else if (sub_time$Var1[1]==18) {
    reordered$pop_hour[i] <- "6 pm"
    } else if (sub_time$Var1[1]==19) {
    reordered$pop_hour[i] <- "7 pm"
    } else if (sub_time$Var1[1]==20) {
    reordered$pop_hour[i] <- "8 pm"
    } else if (sub_time$Var1[1]==21) {
    reordered$pop_hour[i] <- "9 pm"
    } else if (sub_time$Var1[1]==22) {
    reordered$pop_hour[i] <- "10 pm"
    } else if (sub_time$Var1[1]==23) {
    reordered$pop_hour[i] <- "11 pm"
    } else if (sub_time$Var1[1]==24) {
    reordered$pop_hour[i] <- "12 am"
    }
  if (sub_time$Freq[1]==1) {
    reordered$pop_hour[i] <- "Not enough data"
  }
}

library(DT)
for_table <- reordered[c("name", "incidents", "sworn", "inc.rate", "first_most", "second_most", "pop_day", "pop_hour")]
colnames(for_table) <- c("Department", "Tickets", "Officers", "Tickets per officer", "Most common", "Second-most common", "Day", "Hour")
datatable(for_table)

There were 595951 traffic tickets total

Observations


Which departments hands out the most speeding tickets?

*Minus state police

library(knitr)
speeding <- subset(incidents, StatutoryReasonForStop=="Speed Related")
top_dept_speeding <- data.frame(sort(table(speeding$Department.Name), decreasing=TRUE))
top_dept_speeding <- head(top_dept_speeding)
colnames(top_dept_speeding) <- "speeding.tickets"
kable(head(top_dept_speeding))
speeding.tickets
State Police 75394
Newtown 4688
Ridgefield 3491
Southington 2854
New Milford 2551
Greenwich 2271
top_dept_speeding$department <- rownames(top_dept_speeding)
top_dept_speeding <- top_dept_speeding[-1,]
ggplot(top_dept_speeding, aes(x=department, y=speeding.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Observations: - Something - Something else


Which departments writes the most citations for ‘other’?

*Minus state police

other <- subset(incidents, StatutoryReasonForStop=="Other")
top_dept_other <- data.frame(sort(table(other$Department.Name), decreasing=TRUE))
top_dept_other <- head(top_dept_other)
colnames(top_dept_other) <- "other.tickets"
kable(head(top_dept_other))
other.tickets
State Police 61989
Norwalk 1672
New Haven 1503
Hartford 1336
Bridgeport 978
Greenwich 927
top_dept_other$department <- rownames(top_dept_other)
top_dept_other <- top_dept_other[-1,]
ggplot(top_dept_other, aes(x=department, y=other.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Observations: - Something - Something else


Which departments hands out the most tickets for cell phone use?

*Minus state police

cell <- subset(incidents, StatutoryReasonForStop=="Cell Phone")
top_dept_cell <- data.frame(sort(table(cell$Department.Name), decreasing=TRUE))
top_dept_cell <- head(top_dept_cell)
colnames(top_dept_cell) <- "cell.tickets"
kable(head(top_dept_cell))
cell.tickets
State Police 15718
Danbury 2465
Wallingford 1339
West Hartford 1312
Branford 1227
Ridgefield 1226
top_dept_cell$department <- rownames(top_dept_cell)
top_dept_cell <- top_dept_cell[-1,]
ggplot(top_dept_cell, aes(x=department, y=cell.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Which departments hands out the most tickets for defective lights?

*Minus state police

lights <- subset(incidents, StatutoryReasonForStop=="Defective Lights")
top_dept_lights <- data.frame(sort(table(lights$Department.Name), decreasing=TRUE))
top_dept_lights <- head(top_dept_lights)
colnames(top_dept_lights) <- "lights.tickets"
kable(head(top_dept_lights))
lights.tickets
State Police 8560
Torrington 2186
Enfield 1762
Wallingford 1583
Windsor 1496
Newington 1450
top_dept_lights$department <- rownames(top_dept_lights)
top_dept_lights <- top_dept_lights[-1,]
ggplot(top_dept_lights, aes(x=department, y=lights.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Which departments hands out the most tickets for Display of Plates?

*Minus state police

plates <- subset(incidents, StatutoryReasonForStop=="Display of Plates")
top_dept_plates <- data.frame(sort(table(plates$Department.Name), decreasing=TRUE))
top_dept_plates <- head(top_dept_plates)
colnames(top_dept_plates) <- "plates.tickets"
kable(head(top_dept_plates))
plates.tickets
State Police 4519
Wethersfield 796
New Haven 687
Hartford 482
Wallingford 456
Torrington 440
top_dept_plates$department <- rownames(top_dept_plates)
top_dept_plates <- top_dept_plates[-1,]
ggplot(top_dept_plates, aes(x=department, y=plates.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Which departments hands out the most tickets for Registration?

*Minus state police

regi <- subset(incidents, StatutoryReasonForStop=="Registration")
top_dept_regi <- data.frame(sort(table(regi$Department.Name), decreasing=TRUE))
top_dept_regi <- head(top_dept_regi)
colnames(top_dept_regi) <- "regi.tickets"
kable(head(top_dept_regi))
regi.tickets
State Police 22692
Branford 1696
West Hartford 1579
Greenwich 1576
East Hartford 1083
Glastonbury 994
top_dept_regi$department <- rownames(top_dept_regi)
top_dept_regi <- top_dept_regi[-1,]
ggplot(top_dept_regi, aes(x=department, y=regi.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Which departments hands out the most tickets for Stop Sign?

*Minus state police

stop <- subset(incidents, StatutoryReasonForStop=="Stop Sign")
top_dept_stop <- data.frame(sort(table(stop$Department.Name), decreasing=TRUE))
top_dept_stop <- head(top_dept_stop)
colnames(top_dept_stop) <- "stop.tickets"
kable(head(top_dept_stop))
stop.tickets
State Police 5194
New Britain 1223
Wallingford 1038
Torrington 991
New Haven 916
Hartford 813
top_dept_stop$department <- rownames(top_dept_stop)
top_dept_stop <- top_dept_stop[-1,]
ggplot(top_dept_stop, aes(x=department, y=stop.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Mapping out the tickets per employee by department headquarter

library(leaflet)

depts <- read.csv("Police_Department_Map.csv", stringsAsFactors=FALSE)
depts <- left_join(reordered, depts)
## Joining by: "name"
depts$Location <- gsub("\\)","",depts$Location)
depts$Location <- gsub("\\(","",depts$Location)
depts$Lng <- sub(".*,","",depts$Location)
depts$Lat <- sub(",.*","",depts$Location)

depts$Lng <- as.numeric(depts$Lng)
depts$Lat <- as.numeric(depts$Lat)

depts <- subset(depts, !is.na(Lat))
binpald <- colorBin("Blues", depts$inc.rate, 6, pretty = FALSE)


town_popup <- paste('<strong>',depts$name,'</strong><br>Traffic tickets per officer: ',depts$inc.rate)

m <- leaflet(depts) %>% addTiles('http://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}', attribution='&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> &copy; <a href="http://cartodb.com/attributions">CartoDB</a>') %>% 
    setView(-72.690940, 41.651426, zoom = 9) %>% 
    addCircles(~Lng, ~Lat, popup=town_popup, weight = 2, fillOpacity=.7, fillColor=~binpald(inc.rate), color = '#000000', radius=~sqrt(depts$inc.rate)*300) %>%
  addLegend("bottomright", pal=binpald, values=~inc.rate,
            title="Traffic tickets per officer",
            opacity = 1 )
 
m