An analysis for the TrendCT story: What time of day drivers get ticketed the most in Connecticut
The data is from the Connecticut Racial Profiling Prohibition Project hosted at the CT Data Collaborative which collects traffic incident reports between October 2013 and September 2014.
# Some libraries
library(lubridate)
library(ggplot2)
# Bringing the data in. This is a large file. Brace yourself.
#incidents <- read.csv("http://ctrp3viz.s3.amazonaws.com/data/Connecticut_r1.csv", stringsAsFactors=FALSE)
# Cleaning up the dates and time
incidents$RealDate <- as.POSIXct(as.Date(incidents$InterventionDateTime, origin="1899-12-30"))
incidents$RealTime <- format(as.POSIXct((incidents$InterventionTime) * 86400, origin = "1970-01-01"), "%H:%M")
incidents$RealTime <- hm(incidents$RealTime)
incidents$Hour <- hour(incidents$RealTime)
incidents$Hour2 <- hour(incidents$RealDate)
incidents$Day.of.Week <- as.factor(incidents$Day.of.Week)
levels(incidents$Day.of.Week) <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
c <- ggplot(incidents, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c
Observations
no_state <- subset(incidents, Department.Name!="State Police")
c <- ggplot(no_state, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c
Answer: Nope. Looks about the samee as the chart above
c <- ggplot(incidents, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c + facet_grid(Day.of.Week ~ .)
Observations
# First, take out a few outliers
filtered <- subset(incidents, Department.Name!="State Police" & Department.Name!="MET DIST WATER AUTHORITY" & Department.Name!="MTA Stamford")
c <- ggplot(filtered, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c <- c + ggtitle("Frequency of tickets in towns by hour")
c <- c + ylab("Tickets")
department_histograms <- c + facet_wrap(~Department.Name, ncol=4)
department_histograms
Observations
filtered <- subset(incidents, Department.Name!="State Police" & Department.Name!="MET DIST WATER AUTHORITY" & Department.Name!="MTA Stamford")
c <- ggplot(filtered, aes(x=Hour2))
c <- c + geom_histogram(colour="darkred",fill="white", binwidth=1)
c <- c + ggtitle("Frequency of tickets in towns by hour")
c <- c + ylab("Tickets")
department_histograms <- c + facet_wrap(~Department.Name, ncol=4, scales="free")
department_histograms