This is the methodology used for the Trend CT story: Who’s sitting in pretrial detention in Connecticut. These are exploratory tables and charts— some of which I did not include in the final story for various reasons.

Visit the repo for the data used in this analysis. (Also, check out the reproducible scripts and data behind many of our other stories in our central repo)

Data for this analysis was provided by Department of Corrections via the Connecticut Open Data Portal, which is updated nightly.

What’s in this walkthrough

Several visualizations and tables exploring the data

library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(knitr)
library(ggalt)
library(extrafont)
library(grid)
library(gridExtra)
library(stringr)
library(DT)
update <- read.csv("https://data.ct.gov/api/views/b674-jy6w/rows.csv")
# by date, race total
race_total_date <- update %>%
  group_by(DOWNLOAD.DATE, RACE) %>%
  summarise(total=n()) %>%
  filter(RACE=="BLACK" | RACE=="HISPANIC" | RACE=="WHITE")

race_total_date$DOWNLOAD.DATE <- mdy(race_total_date$DOWNLOAD.DATE)

gg <- ggplot(race_total_date, aes(x=DOWNLOAD.DATE, y=total, group=RACE, color=RACE)) + geom_line()
gg <- gg + labs(x=NULL, y="Inmates", title=paste("Accused pre-trial inmates in Connecticut jails"),
                caption="SOURCE: Department of Corrections\nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=10))
#gg <- gg + theme(axis.ticks=element_blank())
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=10, r=80), color="#7a7d7e"))
gg <- gg + geom_label(data=race_total_date, aes(x=ymd("2016-7-19"), y=1336, label="Black (1,336)", hjust=0),
                      family="Helvetica", lineheight=0.95,
                      size=4.5, label.size=0, color="black")
gg <- gg + geom_label(data=race_total_date, aes(x=ymd("2016-7-19"), y=849, label="Hispanic (849)", hjust=0),
                      family="Helvetica", lineheight=0.95,
                      size=4.5, label.size=0, color="black")
gg <- gg + geom_label(data=race_total_date, aes(x=ymd("2016-7-19"), y=1126, label="White (1,126)", hjust=0),
                      family="Helvetica", lineheight=0.95,
                      size=4.5, label.size=0, color="black")
gg <- gg + theme(legend.position="none")
gg <- gg + theme(plot.margin = unit(c(1, 5, 1, 1), "lines"))
gg

gb <- ggplot_build(gg)
gt <- ggplot_gtable(gb)

gt$layout$clip[gt$layout$name=="panel"] <- "off"
grid.draw(gt)

ggsave(gt, file = "race_totals_time.png", width = 8, height = 6, type = "cairo-png")

race_total_date$RACE <- gsub(" ", ".", race_total_date$RACE)

race_total_date <- update %>%
  group_by(DOWNLOAD.DATE, RACE) %>%
  summarise(total=n()) %>%
  spread(RACE, total) %>%
  mutate(total = `AMER IND` + ASIAN + BLACK + HISPANIC + WHITE) %>%
  mutate(American.Indian.per = round(`AMER IND`/total*100,2), Asian.per = round(ASIAN/total*100,2), Black.per = round(BLACK/total*100,2), Hispanic.per = round(HISPANIC/total*100,2), White.per = round(WHITE/total*100,2)) %>%
  select(DOWNLOAD.DATE, American.Indian.per, Asian.per, Black.per, Hispanic.per, White.per) %>%
  gather(Race, Percent, 2:6)

race_total_date$Race <- gsub(".per", "", race_total_date$Race)

gg <- ggplot(race_total_date, aes(x=mdy(DOWNLOAD.DATE), y=Percent)) + geom_bar(stat="identity") + facet_grid(.~Race)
gg <- gg + labs(x=NULL, y="Percent", title=paste("Racial makeup of accused pre-trial inmates"),
                caption="SOURCE: Department of Corrections\nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=16))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
#gg <- gg + theme(axis.ticks=element_blank())
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=10, r=80), color="#7a7d7e"))
gg <- gg + theme(plot.margin = unit(c(1, 1, 1, 1), "lines"))

gg