Exploring different ways to visualize traffic data.

Driving-age residents versus traffic stops

## Warning: Removed 39 rows containing missing values (geom_point).

## Warning: Removed 39 rows containing missing values (geom_point).

## Warning: Removed 39 rows containing missing values (geom_point).

Estimated Driving Population

stops_race1 <- dept %>%
  select(DepartmentName, edp_b_p, edp_b, edp_h_p, edp_h, edp_m_p, edp_m, edp_b_diff, edp_h_diff, edp_m_diff, edp_b_ratio, edp_h_ratio, edp_m_ratio) %>%
  gather("Ethnicity", "Percent", 2:13)


#-----------

stops_race <- stops_race1

stops_race$type<- ifelse(grepl("_p", stops_race$Ethnicity), "Traffic stops", "Estimated Driving Population")
stops_race$type<- ifelse(grepl("_ratio", stops_race$Ethnicity), "Ratio", stops_race$type)
stops_race$type<- ifelse(grepl("_diff", stops_race$Ethnicity), "Difference", stops_race$type)


stops_race$Ethnicity <- ifelse(grepl("_b", stops_race$Ethnicity), "Black", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("_h", stops_race$Ethnicity), "Hispanic", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("_m", stops_race$Ethnicity), "Minority", stops_race$Ethnicity)

stops_race <- stops_race %>%
  spread(type, Percent)
stops_race$type <- "blank"

stops_race$DepartmentName <- as.character(stops_race$DepartmentName)

stops_race <- subset(stops_race, !is.na(Difference))


ggplot(stops_race, aes(`Traffic stops`, `Estimated Driving Population`, group = DepartmentName, color=DepartmentName)) +   
  geom_point(size = 2, aes(colour = as.factor(Ethnicity))) +
  #geom_line(colour="lightgray") +
 # geom_text(data = stops_race,aes(x=traffic_stops,y=driving_population + 3, label=Ethnicity)) +
  geom_abline(intercept = 0.1) +
 ylim(0,100) + xlim(0,100) +
#  expand_limits(x = 0, y = 0) +
  theme_minimal()  +  labs(title="Estimated driving population versus traffic stops")

## Minorities dumbbell chart

stops_race_sub <- subset(stops_race, Difference > 10)

# officers_sub <- officers %>%
#   select(ReportingOfficerIdentificationID, minorities_p)
# officers_sub$minorities_p <- round(officers_sub$minorities_p/100,2)
# officers_sub <- subset(officers_sub, !is.na(minorities_p))
# 
# officers_sub$ReportingOfficerIdentificationID <- gsub(".*--", "", officers_sub$ReportingOfficerIdentificationID)

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Minority") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Estimated Driving Population` <- round(officers_m1$`Estimated Driving Population`/100,2)
officers_m1$`Traffic stops` <- round(officers_m1$`Traffic stops`/100,2)
# gg <- ggplot(officers_m1, aes(y=reorder(DepartmentName, distance_between_stops_and_population), x= traffic_stops))

# Minorities

gg <- ggplot()

gg <- gg + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.655), color="#b2b2b2", size=0.15)

gg <- gg + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Estimated Driving Population`, xend=`Traffic stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#gg <- gg + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
gg <- gg + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .70))

# text below points
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"),
                     aes(x=`Estimated Driving Population`, y=DepartmentName, label="Est. driving population"),
                     color="tomato", size=4, vjust=-2, fontface="bold", family="Calibri")
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"),
                     aes(x=`Traffic stops`, y=DepartmentName, label="Traffic stops"),
                     color="steelblue", size=4, vjust=-2, fontface="bold", family="Calibri")
# text above points

gg <- gg + geom_text(data=officers_m1, aes(x=`Estimated Driving Population`, y=DepartmentName, label=paste0(round(`Estimated Driving Population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

gg <- gg + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Traffic stops`, y=DepartmentName, label=paste0(round(`Traffic stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
gg <- gg + geom_rect(data=officers_m1, aes(xmin=.665, xmax=.7, ymin=-Inf, ymax=Inf), fill="#efefe3")
gg <- gg + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.68), fontface="bold", size=4, family="Calibri")
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"), aes(x=.68, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#gg <- gg + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
gg <- gg + scale_y_discrete(expand=c(0.075,0))
gg <- gg + labs(x=NULL, y=NULL, title="Minorities stopped compared to estimated driving population",
                subtitle="",
                caption="CCSU")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(panel.grid.major=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
gg

# Black

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Black") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Estimated Driving Population` <- round(officers_m1$`Estimated Driving Population`/100,2)
officers_m1$`Traffic stops` <- round(officers_m1$`Traffic stops`/100,2)

hh <- ggplot()

hh <- hh + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.655), color="#b2b2b2", size=0.15)

hh <- hh + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Estimated Driving Population`, xend=`Traffic stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#hh <- hh + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
hh <- hh + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .70))

# text below points
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="East Hartford"),
                     aes(x=`Estimated Driving Population`, y=DepartmentName, label="Est. driving population"),
                     color="tomato", size=4, vjust=-2, fontface="bold", family="Calibri")
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="East Hartford"),
                     aes(x=`Traffic stops`, y=DepartmentName, label="Traffic stops"),
                     color="steelblue", size=4, vjust=-2, fontface="bold", family="Calibri")
# text above points

hh <- hh + geom_text(data=officers_m1, aes(x=`Estimated Driving Population`, y=DepartmentName, label=paste0(round(`Estimated Driving Population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

hh <- hh + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Traffic stops`, y=DepartmentName, label=paste0(round(`Traffic stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
hh <- hh + geom_rect(data=officers_m1, aes(xmin=.665, xmax=.7, ymin=-Inf, ymax=Inf), fill="#efefe3")
hh <- hh + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.68), fontface="bold", size=4, family="Calibri")
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="East Hartford"), aes(x=.68, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#hh <- hh + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
hh <- hh + scale_y_discrete(expand=c(0.075,0))
hh <- hh + labs(x=NULL, y=NULL, title="Blacks stopped compared to estimated driving population",
                subtitle="",
                caption="CCSU")
hh <- hh + theme_bw(base_family="Calibri")
hh <- hh + theme(panel.grid.major=element_blank())
hh <- hh + theme(panel.grid.minor=element_blank())
hh <- hh + theme(panel.border=element_blank())
hh <- hh + theme(axis.ticks=element_blank())
hh <- hh + theme(axis.text.x=element_blank())
hh <- hh + theme(plot.title=element_text(face="bold"))
hh <- hh + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
hh <- hh + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
hh

# Hispanic

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Hispanic") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Estimated Driving Population` <- round(officers_m1$`Estimated Driving Population`/100,2)
officers_m1$`Traffic stops` <- round(officers_m1$`Traffic stops`/100,2)

ii <- ggplot()

ii <- ii + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.655), color="#b2b2b2", size=0.15)

ii <- ii + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Estimated Driving Population`, xend=`Traffic stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#ii <- ii + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
ii <- ii + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .70))

# text below points
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"),
                     aes(x=`Estimated Driving Population`, y=DepartmentName, label="Est. driving population"),
                     color="tomato", size=4, vjust=-2,  fontface="bold", family="Calibri")
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"),
                     aes(x=`Traffic stops`, y=DepartmentName, label="Traffic stops"),
                     color="steelblue", size=4, vjust=-2,  fontface="bold", family="Calibri")
# text above points

ii <- ii + geom_text(data=officers_m1, aes(x=`Estimated Driving Population`, y=DepartmentName, label=paste0(round(`Estimated Driving Population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

ii <- ii + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Traffic stops`, y=DepartmentName, label=paste0(round(`Traffic stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
ii <- ii + geom_rect(data=officers_m1, aes(xmin=.665, xmax=.7, ymin=-Inf, ymax=Inf), fill="#efefe3")
ii <- ii + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.68), fontface="bold", size=4, family="Calibri")
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Wethersfield"), aes(x=.68, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#ii <- ii + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
#ii <- ii + scale_y_discrete(expand=c(0.075,0))
ii <- ii + labs(x=NULL, y=NULL, title="Hispanics stopped compared to estimated driving population")
ii <- ii + theme_bw(base_family="Calibri")
ii <- ii + theme(panel.grid.major=element_blank())
ii <- ii + theme(panel.grid.minor=element_blank())
ii <- ii + theme(panel.border=element_blank())
ii <- ii + theme(axis.ticks=element_blank())
ii <- ii + theme(axis.text.x=element_blank())
ii <- ii + theme(plot.title=element_text(face="bold"))
ii <- ii + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
ii <- ii + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
ii

grid.arrange(gg, hh, ii, ncol=3)

Residents

stops_race1 <- dept %>%
  select(DepartmentName, m_res, m_res_stops, res_diff_m, b_res, b_res_stops, res_diff_b, h_res, h_res_stops, res_diff_h) %>%
  gather("Ethnicity", "Percent", 2:10)


#-----------

stops_race <- stops_race1

stops_race$type<- ifelse(grepl("_res", stops_race$Ethnicity), "Resident population", stops_race$Ethnicity)
stops_race$type<- ifelse(grepl("_res_stops", stops_race$Ethnicity), "Resident stops", stops_race$type)
stops_race$type<- ifelse(grepl("_diff", stops_race$Ethnicity), "Difference", stops_race$type)


stops_race$Ethnicity <- ifelse(grepl("b_", stops_race$Ethnicity), "Black", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("h_", stops_race$Ethnicity), "Hispanic", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("m_", stops_race$Ethnicity), "Minority", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("_b", stops_race$Ethnicity), "Black", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("_h", stops_race$Ethnicity), "Hispanic", stops_race$Ethnicity)
stops_race$Ethnicity <- ifelse(grepl("_m", stops_race$Ethnicity), "Minority", stops_race$Ethnicity)


stops_race <- stops_race %>%
  spread(type, Percent)
stops_race$type <- "blank"

stops_race$DepartmentName <- as.character(stops_race$DepartmentName)

stops_race <- subset(stops_race, !is.na(Difference))


ggplot(stops_race, aes(`Resident stops`, `Resident population`, group = DepartmentName, color=DepartmentName)) +   
  geom_point(size = 2, aes(colour = as.factor(Ethnicity))) +
  #geom_line(colour="lightgray") +
 # geom_text(data = stops_race,aes(x=traffic_stops,y=driving_population + 3, label=Ethnicity)) +
  geom_abline(intercept = 0.1) +
 ylim(0,100) + xlim(0,100) +
#  expand_limits(x = 0, y = 0) +
  theme_minimal()  +  labs(title="Resident population versus Resident stops")

## Minorities dumbbell chart

stops_race_sub <- subset(stops_race, Difference > 10)

# officers_sub <- officers %>%
#   select(ReportingOfficerIdentificationID, minorities_p)
# officers_sub$minorities_p <- round(officers_sub$minorities_p/100,2)
# officers_sub <- subset(officers_sub, !is.na(minorities_p))
# 
# officers_sub$ReportingOfficerIdentificationID <- gsub(".*--", "", officers_sub$ReportingOfficerIdentificationID)

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Minority") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Resident population` <- round(officers_m1$`Resident population`/100,2)
officers_m1$`Resident stops` <- round(officers_m1$`Resident stops`/100,2)
# gg <- ggplot(officers_m1, aes(y=reorder(DepartmentName, distance_between_stops_and_population), x= traffic_stops))

# Minorities

gg <- ggplot()

gg <- gg + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.9), color="#b2b2b2", size=0.15)

gg <- gg + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Resident population`, xend=`Resident stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#gg <- gg + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
gg <- gg + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .9))

# text below points
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"),
                     aes(x=`Resident population`, y=DepartmentName, label="Resident population"),
                     color="tomato", size=4, vjust=-2, fontface="bold", family="Calibri")
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"),
                     aes(x=`Resident stops`, y=DepartmentName, label="Resident stops"),
                     color="steelblue", size=4, vjust=-2, fontface="bold", family="Calibri")
# text above points

gg <- gg + geom_text(data=officers_m1, aes(x=`Resident population`, y=DepartmentName, label=paste0(round(`Resident population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

gg <- gg + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Resident stops`, y=DepartmentName, label=paste0(round(`Resident stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
gg <- gg + geom_rect(data=officers_m1, aes(xmin=.85, xmax=.9, ymin=-Inf, ymax=Inf), fill="#efefe3")
gg <- gg + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.875), fontface="bold", size=4, family="Calibri")
gg <- gg + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"), aes(x=.875, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#gg <- gg + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
gg <- gg + scale_y_discrete(expand=c(0.075,0))
gg <- gg + labs(x=NULL, y=NULL, title="Minorities stopped compared to resident population",
                subtitle="",
                caption="CCSU")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(panel.grid.major=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
gg

# Black

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Black") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Resident population` <- round(officers_m1$`Resident population`/100,2)
officers_m1$`Resident stops` <- round(officers_m1$`Resident stops`/100,2)

hh <- ggplot()

hh <- hh + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.9), color="#b2b2b2", size=0.15)

hh <- hh + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Resident population`, xend=`Resident stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#hh <- hh + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
hh <- hh + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .90))

# text below points
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="New Haven"),
                     aes(x=`Resident population`, y=DepartmentName, label="Est. driving population"),
                     color="tomato", size=4, vjust=-2, fontface="bold", family="Calibri")
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="New Haven"),
                     aes(x=`Resident stops`, y=DepartmentName, label="Resident stops"),
                     color="steelblue", size=4, vjust=-2, fontface="bold", family="Calibri")
# text above points

hh <- hh + geom_text(data=officers_m1, aes(x=`Resident population`, y=DepartmentName, label=paste0(round(`Resident population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

hh <- hh + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Resident stops`, y=DepartmentName, label=paste0(round(`Resident stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
hh <- hh + geom_rect(data=officers_m1, aes(xmin=.85, xmax=.9, ymin=-Inf, ymax=Inf), fill="#efefe3")
hh <- hh + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.875), fontface="bold", size=4, family="Calibri")
hh <- hh + geom_text(data=filter(officers_m1, DepartmentName=="New Haven"), aes(x=.875, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#hh <- hh + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
hh <- hh + scale_y_discrete(expand=c(0.075,0))
hh <- hh + labs(x=NULL, y=NULL, title="Blacks stopped compared to resident population",
                subtitle="",
                caption="CCSU")
hh <- hh + theme_bw(base_family="Calibri")
hh <- hh + theme(panel.grid.major=element_blank())
hh <- hh + theme(panel.grid.minor=element_blank())
hh <- hh + theme(panel.border=element_blank())
hh <- hh + theme(axis.ticks=element_blank())
hh <- hh + theme(axis.text.x=element_blank())
hh <- hh + theme(plot.title=element_text(face="bold"))
hh <- hh + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
hh <- hh + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
hh

# Hispanic

officers_m1 <- stops_race_sub %>%
  filter(Ethnicity=="Hispanic") %>%
#  filter(DepartmentName!="Connecticut") %>%
  arrange(-Difference)
  
officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(officers_m1$Difference)])

officers_m1$Difference <- round(officers_m1$Difference/100,2)
officers_m1$`Resident population` <- round(officers_m1$`Resident population`/100,2)
officers_m1$`Resident stops` <- round(officers_m1$`Resident stops`/100,2)

ii <- ggplot()

ii <- ii + geom_segment(data=officers_m1, aes(y=DepartmentName, yend=DepartmentName, x=0, xend=.85), color="#b2b2b2", size=0.15)

ii <- ii + geom_dumbbell(data=officers_m1, aes(y=DepartmentName, x=`Resident population`, xend=`Resident stops`),
                         size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                         point.colour.l="tomato", point.colour.r="steelblue")

#   point.colour.l="tomato", point.colour.r="steelblue"
#ii <- ii + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
ii <- ii + scale_x_continuous(expand=c(0,0), labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, .90))

# text below points
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"),
                     aes(x=`Resident population`, y=DepartmentName, label="Est. driving population"),
                     color="tomato", size=4, vjust=-2,  fontface="bold", family="Calibri")
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"),
                     aes(x=`Resident stops`, y=DepartmentName, label="Resident stops"),
                     color="steelblue", size=4, vjust=-2,  fontface="bold", family="Calibri")
# text above points

ii <- ii + geom_text(data=officers_m1, aes(x=`Resident population`, y=DepartmentName, label=paste0(round(`Resident population`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

ii <- ii + geom_text(data=officers_m1, color="steelblue", size=3.75, vjust=2.5, family="Calibri",
                     aes(x=`Resident stops`, y=DepartmentName, label=paste0(round(`Resident stops`*100,2), "%")),
                     color="tomato", size=3.75, vjust=2.5, family="Calibri")

## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`, `size`, `vjust`, `family`)

# difference column
ii <- ii + geom_rect(data=officers_m1, aes(xmin=.85, xmax=.9, ymin=-Inf, ymax=Inf), fill="#efefe3")
ii <- ii + geom_text(data=officers_m1, aes(label=round(Difference*100,2), y=DepartmentName, x=.875), fontface="bold", size=4, family="Calibri")
ii <- ii + geom_text(data=filter(officers_m1, DepartmentName=="Meriden"), aes(x=.875, y=DepartmentName, label="DIFF"),
                     color="#7a7d7e", size=3.5, vjust=-2, fontface="bold", family="Calibri")
#ii <- ii + scale_x_continuous(expand=c(0,0), limits=c(0, 1.175))
#ii <- ii + scale_y_discrete(expand=c(0.075,0))
ii <- ii + labs(x=NULL, y=NULL, title="Hispanics stopped compared to Resident population")
ii <- ii + theme_bw(base_family="Calibri")
ii <- ii + theme(panel.grid.major=element_blank())
ii <- ii + theme(panel.grid.minor=element_blank())
ii <- ii + theme(panel.border=element_blank())
ii <- ii + theme(axis.ticks=element_blank())
ii <- ii + theme(axis.text.x=element_blank())
ii <- ii + theme(plot.title=element_text(face="bold"))
ii <- ii + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
ii <- ii + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
ii

grid.arrange(gg, hh, ii, ncol=3)

pre_points <- read.csv("mega_df.csv")

pre_points$points_sa_m <- 0
pre_points$points_sa_b <- 0
pre_points$points_sa_h <- 0
pre_points$points_edp_m <- 0
pre_points$points_edp_b <- 0
pre_points$points_edp_h <- 0
pre_points$points_res_m <- 0
pre_points$points_res_b <- 0
pre_points$points_res_h <- 0


pre_points$points_sa_m <- ifelse(pre_points$m_distance>10, 1, 0)
pre_points$points_sa_b <- ifelse(pre_points$b_distance>10, 1, 0)
pre_points$points_sa_h <- ifelse(pre_points$h_distance>10, 1, 0)

pre_points$points_edp_m <- ifelse(pre_points$edp_m_diff >10, 1,0)
pre_points$points_edp_m <- ifelse(pre_points$edp_m_diff <10 & pre_points$edp_m_diff >5 & pre_points$edp_m_ratio > 1.75, .5,pre_points$points_edp_m)
pre_points$points_edp_b <- ifelse(pre_points$edp_b_diff >10, 1,0)
pre_points$points_edp_b <- ifelse(pre_points$edp_b_diff <10 & pre_points$edp_b_diff >5 & pre_points$edp_b_ratio > 1.75, .5,pre_points$points_edp_b)
pre_points$points_edp_h <- ifelse(pre_points$edp_h_diff >10, 1,0)
pre_points$points_edp_h <- ifelse(pre_points$edp_h_diff <10 & pre_points$edp_h_diff >5 & pre_points$edp_h_ratio > 1.75, .5,pre_points$points_edp_h)

pre_points$points_res_m <- ifelse(pre_points$res_diff_m >10, 1,0)
pre_points$points_res_m <- ifelse(pre_points$res_diff_m <10 & pre_points$res_diff_m >5 & pre_points$res_ratio_m > 1.75, .5,pre_points$points_res_m)
pre_points$points_res_b <- ifelse(pre_points$res_diff_b >10, 1,0)
pre_points$points_res_b <- ifelse(pre_points$res_diff_b <10 & pre_points$res_diff_b >5 & pre_points$res_ratio_b > 1.75, .5,pre_points$points_res_b)
pre_points$points_res_h <- ifelse(pre_points$res_diff_h >10, 1,0)
pre_points$points_res_h <- ifelse(pre_points$res_diff_h <10 & pre_points$res_diff_h >5 & pre_points$res_ratio_h > 1.75, .5,pre_points$points_res_h)

subset_test <- subset(pre_points, is.na(ReportingOfficerIdentificationID))

subset_test <- subset_test[c("DepartmentName", "points_sa_m", "points_sa_b", "points_sa_h", "points_edp_m", "points_edp_b", "points_edp_h", "points_res_m", "points_res_b", "points_res_h")]

subset_test$points <- rowSums(subset_test[,c(2:10)], na.rm=TRUE)
subset_test <- arrange(subset_test,-points)
  
subset_test$DepartmentName <- factor(subset_test$DepartmentName, levels = rev(subset_test$DepartmentName))


subset_test <- subset(subset_test,points>0)

subset_test_points <- subset_test[c("DepartmentName", "points")]
subset_test$points <- NULL

library(ggplot2)
library(dplyr)
library(tidyr)

subset_test_graph <- subset_test %>%
  gather("Disparity", "Points", 2:10)

subset_test_graph$Disparity <- gsub("points_edp_b", "Black - Estimated driving population", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_edp_m", "Minorities - Estimated driving population", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_edp_h", "Hispanic - Estimated driving population", subset_test_graph$Disparity)

subset_test_graph$Disparity <- gsub("points_res_m", "Minorities - Resident population", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_res_b", "Black - Resident population", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_res_h", "Hispanic - Resident population", subset_test_graph$Disparity)

subset_test_graph$Disparity <- gsub("points_sa_m", "Minorities - Statewide average", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_sa_b", "Black - Statewide average", subset_test_graph$Disparity)
subset_test_graph$Disparity <- gsub("points_sa_h", "Hispanic - Statewide average", subset_test_graph$Disparity)

subset_test_graph <- left_join(subset_test_graph, subset_test_points)

## Joining by: "DepartmentName"

#subset_test <- arrange(subset_test,-points)
  
#officers_m1$DepartmentName <- factor(officers_m1$DepartmentName, levels = officers_m1$DepartmentName[order(-officers_m1$Difference)])


cbPalette <- c("#006d2c", "#2ca25f", "#66c2a4", "#54278f", "#756bb1", "#9e9ac8", "#b30000", "#e34a33", "#fc8d59")

jj <- ggplot(subset_test_graph, aes(x=DepartmentName, y=Points, fill=Disparity)) 
jj <- jj + scale_fill_manual(values=cbPalette) 
jj <- jj + geom_bar(stat="identity") 
jj <- jj + geom_text(aes(x=DepartmentName, y=points, ymax=points, label=points, hjust=-1), fontface="bold", size=4, family="Calibri")
jj <- jj + coord_flip() 
jj <- jj + labs(x=NULL, y=NULL, title="Disparity points by Department")
jj <- jj + theme_bw(base_family="Calibri")
jj <- jj + theme(panel.grid.major=element_blank())
jj <- jj + theme(panel.grid.minor=element_blank())
jj <- jj + theme(panel.border=element_blank())
jj <- jj + theme(axis.ticks=element_blank())
jj <- jj + theme(axis.text.x=element_blank())
jj <- jj + theme(plot.title=element_text(face="bold"))
jj <- jj + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
jj <- jj + theme(plot.caption=element_text(size=7, margin=margin(t=12), color="#7a7d7e"))
jj

## Warning: Removed 6 rows containing missing values (position_stack).

# Disparity points

story1b

Andrew Ba Tran

June 3, 2016

Exploring different ways to visualize traffic data.

Driving-age residents versus traffic stops

Estimated Driving Population

Residents