An analysis for the TrendCT story: What time of day drivers get ticketed the most in Connecticut

The data is from the Connecticut Racial Profiling Prohibition Project hosted at the CT Data Collaborative which collects traffic incident reports between October 2013 and September 2014.

# Some libraries
library(lubridate)
library(ggplot2)

# Bringing the data in. This is a large file. Brace yourself.
#incidents <- read.csv("http://ctrp3viz.s3.amazonaws.com/data/Connecticut_r1.csv", stringsAsFactors=FALSE)


# Cleaning up the dates and time
incidents$RealDate <- as.POSIXct(as.Date(incidents$InterventionDateTime, origin="1899-12-30"))
incidents$RealTime <- format(as.POSIXct((incidents$InterventionTime) * 86400, origin = "1970-01-01"), "%H:%M")
incidents$RealTime <- hm(incidents$RealTime)
incidents$Hour <- hour(incidents$RealTime)
incidents$Hour2 <- hour(incidents$RealDate)
incidents$Day.of.Week <- as.factor(incidents$Day.of.Week)
levels(incidents$Day.of.Week) <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

What times do drivers get ticketed?

c <- ggplot(incidents, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c

Observations


Connecticut State Police hand out nearly 1/3 of all traffic tickets.

Does the pattern change when their data is taken out?

no_state <- subset(incidents, Department.Name!="State Police")
c <- ggplot(no_state, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c

Answer: Nope. Looks about the samee as the chart above


What time during each day of the week do tickets get assigned?

c <- ggplot(incidents, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c + facet_grid(Day.of.Week ~ .)

Observations


Break it down by Department as a whole

# First, take out a few outliers
filtered <- subset(incidents, Department.Name!="State Police" & Department.Name!="MET DIST WATER AUTHORITY" & Department.Name!="MTA Stamford")

c <- ggplot(filtered, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c <- c + ggtitle("Frequency of tickets in towns by hour")
c <- c + ylab("Tickets")
department_histograms <- c + facet_wrap(~Department.Name, ncol=4)
department_histograms

Observations

Let’s take a look again with the y-scale relative to each town’s data

filtered <- subset(incidents, Department.Name!="State Police" & Department.Name!="MET DIST WATER AUTHORITY" & Department.Name!="MTA Stamford")

c <- ggplot(filtered, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c <- c + ggtitle("Frequency of tickets in towns by hour")
c <- c + ylab("Tickets")
department_histograms <- c + facet_wrap(~Department.Name, ncol=4, scales="free")
department_histograms