An analysis for the TrendCT story: Police departments that hand out the most traffic tickets in Connecticut

The data is from the Connecticut Racial Profiling Prohibition Project hosted at the CT Data Collaborative which collects traffic incident reports between October 2013 and September 2014.

Skip ahead and download the summary dataset generated by this analysis.

library(lubridate)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
incidents <- read.csv("http://ctrp3viz.s3.amazonaws.com/data/Connecticut_r1.csv", stringsAsFactors=FALSE)

# Fix times
incidents$RealDate <- as.POSIXct(as.Date(incidents$InterventionDateTime, origin="1899-12-30"))
incidents$RealTime <- format(as.POSIXct((incidents$InterventionTime) * 86400, origin = "1899-12-30", tz="America/Montserrat"), "%H:%M")
incidents$Hour <- hour(incidents$RealDate)


# Let's look at rate per department employment
# First, let's bring in FBI Uniform Crime Report that lists 2013 department employment

pers <- read.csv("police_dept.csv", stringsAsFactors=FALSE)

# Calculate total sworn officers
pers$sworn <- pers$sworn.male+ pers$sworn.female

# Tallying tickets by department
incidents_by_dept <- data.frame(table(incidents$Department.Name))
colnames(incidents_by_dept) <- c("name", "incidents")
incidents_by_dept$name <- toupper(incidents_by_dept$name)
incidents_by_dept <- left_join(incidents_by_dept, pers)
## Joining by: "name"
# Calculating tickets per officer
incidents_by_dept$inc.rate <- round((incidents_by_dept$incidents/incidents_by_dept$sworn), digits=2)

reordered <- incidents_by_dept[order(incidents_by_dept$inc.rate),]

# Creating some stand-in data
reordered$first_most <- "whatever"
reordered$second_most <- "whatevs"
reordered$pop_day <- "funday"
reordered$pop_hour <- "fun o'clock"

library(stringr)
inc <- incidents
inc$Department.Name <- str_to_upper(inc$Department.Name)

# Loop to figure out first- and second-most ticketed offense
# Also most popular day of the week and hour

reo_list <- 1:nrow(reordered)

for (i in reo_list) {
  dept_name <- reordered$name[i]
  sub <- subset(inc, Department.Name==dept_name)
  sub_dat <- data.frame(table(sub$StatutoryReasonForStop))
  sub_dat$Var1 <- as.character(sub_dat$Var1)
  sub_dat <- sub_dat[order(-sub_dat$Freq),] 
  reordered$first_most[i] <- sub_dat$Var1[1]
  reordered$second_most[i] <- sub_dat$Var1[2]
  sub_day <- data.frame(table(sub$Day.of.Week))
  sub_day$Var1 <- as.character(sub_day$Var1)
  sub_day <- sub_day[order(-sub_day$Freq),]
  reordered$pop_day[i] <- sub_day$Var1[1]
  sub_time <- data.frame(table(sub$Hour))
  sub_time <- sub_time[order(-sub_time$Freq),] 
  if (sub_time$Var1[1]==0) {
    reordered$pop_hour[i] <- "12 am"
    } else if (sub_time$Var1[1]==1) {
    reordered$pop_hour[i] <- "1 am"
    } else if (sub_time$Var1[1]==2) {
    reordered$pop_hour[i] <- "2 am"
    } else if (sub_time$Var1[1]==3) {
    reordered$pop_hour[i] <- "3 am"
    } else if (sub_time$Var1[1]==4) {
    reordered$pop_hour[i] <- "4 am"
    } else if (sub_time$Var1[1]==5) {
    reordered$pop_hour[i] <- "5 am"
    } else if (sub_time$Var1[1]==6) {
    reordered$pop_hour[i] <- "6 am"
    } else if (sub_time$Var1[1]==7) {
    reordered$pop_hour[i] <- "7 am"
    } else if (sub_time$Var1[1]==8) {
    reordered$pop_hour[i] <- "8 am"
    } else if (sub_time$Var1[1]==9) {
    reordered$pop_hour[i] <- "9 am"
    } else if (sub_time$Var1[1]==10) {
    reordered$pop_hour[i] <- "10 am"
    } else if (sub_time$Var1[1]==11) {
    reordered$pop_hour[i] <- "11 am"
    } else if (sub_time$Var1[1]==12) {
    reordered$pop_hour[i] <- "12 pm"
    } else if (sub_time$Var1[1]==13) {
    reordered$pop_hour[i] <- "1 pm"
    } else if (sub_time$Var1[1]==14) {
    reordered$pop_hour[i] <- "2 pm"
    } else if (sub_time$Var1[1]==15) {
    reordered$pop_hour[i] <- "3 pm"
    } else if (sub_time$Var1[1]==16) {
    reordered$pop_hour[i] <- "4 pm"
    } else if (sub_time$Var1[1]==17) {
    reordered$pop_hour[i] <- "5 pm"
    } else if (sub_time$Var1[1]==18) {
    reordered$pop_hour[i] <- "6 pm"
    } else if (sub_time$Var1[1]==19) {
    reordered$pop_hour[i] <- "7 pm"
    } else if (sub_time$Var1[1]==20) {
    reordered$pop_hour[i] <- "8 pm"
    } else if (sub_time$Var1[1]==21) {
    reordered$pop_hour[i] <- "9 pm"
    } else if (sub_time$Var1[1]==22) {
    reordered$pop_hour[i] <- "10 pm"
    } else if (sub_time$Var1[1]==23) {
    reordered$pop_hour[i] <- "11 pm"
    } else if (sub_time$Var1[1]==24) {
    reordered$pop_hour[i] <- "12 am"
    }
  if (sub_time$Freq[1]==1) {
    reordered$pop_hour[i] <- "Not enough data"
  }
}

library(DT)
for_table <- reordered[c("name", "incidents", "sworn", "inc.rate", "first_most", "second_most", "pop_day", "pop_hour")]
colnames(for_table) <- c("Department", "Tickets", "Officers", "Tickets per officer", "Most common", "Second-most common", "Day", "Hour")
datatable(for_table)

There were 595951 traffic tickets total

Observations


Which departments hands out the most speeding tickets?

*Minus state police

library(knitr)
speeding <- subset(incidents, StatutoryReasonForStop=="Speed Related")
top_dept_speeding <- data.frame(sort(table(speeding$Department.Name), decreasing=TRUE))
top_dept_speeding <- head(top_dept_speeding)
colnames(top_dept_speeding) <- "speeding.tickets"
kable(head(top_dept_speeding))
speeding.tickets
State Police 75394
Newtown 4688
Ridgefield 3491
Southington 2854
New Milford 2551
Greenwich 2271
top_dept_speeding$department <- rownames(top_dept_speeding)
top_dept_speeding <- top_dept_speeding[-1,]
ggplot(top_dept_speeding, aes(x=department, y=speeding.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")

Observations: - Something - Something else


Which departments writes the most citations for ‘other’?

*Minus state police

other <- subset(incidents, StatutoryReasonForStop=="Other")
top_dept_other <- data.frame(sort(table(other$Department.Name), decreasing=TRUE))
top_dept_other <- head(top_dept_other)
colnames(top_dept_other) <- "other.tickets"
kable(head(top_dept_other))
other.tickets
State Police 61989
Norwalk 1672
New Haven 1503
Hartford 1336
Bridgeport 978
Greenwich 927
top_dept_other$department <- rownames(top_dept_other)
top_dept_other <- top_dept_other[-1,]
ggplot(top_dept_other, aes(x=department, y=other.tickets)) +
    geom_bar(colour="black", fill="red", stat="identity")