This is the methodology used behind the story: Analyzing Connecticut’s Democratic primary results.

Visit the repo for the data.

Who won the Democratic presidential primary election in Connecticut?

zults <- dem_results %>%
  summarise(Clinton=sum(clinton_count), Sanders=sum(sanders_count)) %>%
  gather("candidate", "votes", 1:2)

ggplot(zults, aes(x = candidate, fill=candidate)) +
  geom_bar(stat="identity", aes(y=votes), position="dodge") +
  geom_text(aes(x= candidate, y=votes, label=paste0(as.character(round(votes/sum(votes)*100,0)),"%")), hjust=1) + coord_flip()

Mapping the results

library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-8, (SVN revision 616)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.0dev, released 2015/99/99
##  Path to GDAL shared files: /usr/local/share/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.2-3
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
town_shape <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
town_shape_df <- fortify(town_shape, region="NAME10")

town_map <- dem_results %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

names(town_map)[names(town_map) == 'Town'] <- 'id'

voters_map <- left_join(town_shape_df, town_map)
## Joining by: "id"
theme_opts <- list(theme(panel.grid.minor = element_blank(),
                        panel.grid.major = element_blank(),
                        panel.background = element_blank(),
                        plot.background = element_rect(fill="#e6e8ed"),
                        panel.border = element_blank(),
                        axis.line = element_blank(),
                        axis.text.x = element_blank(),
                        axis.text.y = element_blank(),
                        axis.ticks = element_blank(),
                        axis.title.x = element_blank(),
                        axis.title.y = element_blank(),
                        plot.title = element_text(size=22)))

# plot map
ggplot(voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() + 
  geom_path(color = "white") +
  labs(title="Connecticut primary winner by town") + 
  coord_equal() + 
  theme_opts


Towns with the closest margins

total_margins <- dem_results[c("Town", "clinton_count", "sanders_count", "vote_diff")]
total_margins <- arrange(total_margins, vote_diff)
total_margins <- subset(total_margins, (vote_diff<6) & (vote_diff > -6))

kable(total_margins)
Town clinton_count sanders_count vote_diff
114 Killingworth 356 357 -1
115 Newington 1631 1632 -1
116 Sharon 188 185 3

Hillary Clinton needed just two votes to beat Bernie Sanders in Killingworth and Newington.

Sanders was just three votes behind in Sharon.


Where was Hillary most successful against Bernie?

## Towns with the widest margins

percent_margins <- dem_results[c("Town", "clinton_per", "sanders_per", "percent_diff")]
percent_margins <- arrange(percent_margins, percent_diff)

clinton_lead <- tail(percent_margins, 5)
kable(clinton_lead)
Town clinton_per sanders_per percent_diff
165 Hartford 69.4 29.5 39.9
166 Weston 69.8 29.6 40.2
167 Westport 70.3 28.7 41.6
168 New Canaan 71.8 27.5 44.3
169 Bloomfield 74.6 24.9 49.7

Bloomfield, Westport, and Weston saw the biggest win margins for Hillary at about 3 to 1 votes.


Where did Hillary lost the most to Bernie?

sanders_lead <- head(percent_margins, 5)
kable(sanders_lead)
Town clinton_per sanders_per percent_diff
Voluntown 25.0 69.1 -44.1
Canaan 29.7 69.2 -39.5
Sterling 29.4 68.4 -39.0
Union 30.1 67.5 -37.4
Hartland 30.7 68.1 -37.4

Sanders did quite well in Voluntown, Sterling, and Canaan with a 40 percent point difference between town votes.


How did Hillary in this year’s primary election versus the 2008 primary?

dem2008 <- read.csv("data/dem2008.csv", stringsAsFactors=FALSE)
dem2008_total <- dem2008[c("Town", "clinton_total_2008")]
dem2008_percent <- dem2008[c("Town", "clinton_per_2008")]

timechange <- dem_results[c("Town", "clinton_per", "clinton_count")]
timechange_total <- timechange[c("Town", "clinton_count")]
timechange_percent <- timechange[c("Town", "clinton_per")]

timechange_total <- left_join(dem2008_total, timechange_total)
## Joining by: "Town"
timechange_percent <- left_join(dem2008_percent, timechange_percent)
## Joining by: "Town"
timechange_percent <- arrange(timechange_percent, clinton_per)
timechange_percent$Town <- factor(timechange_percent$Town, levels=unique(timechange_percent$Town))

library(tidyr)
library(scales)
library(ggplot2)
devtools::install_github("hrbrmstr/ggalt")
## Downloading GitHub repo hrbrmstr/ggalt@master
## from URL https://api.github.com/repos/hrbrmstr/ggalt/zipball/master
## Installing ggalt
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/7g/5fxswp_n6092hwr8n4wkqcfw0000gq/T/Rtmp8foaDp/devtools140617b48ceb/hrbrmstr-ggalt-111bdcc'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.2/Resources/library'  \
##   --install-tests
## 
## Reloading installed ggalt
library(ggalt)


gg <- ggplot(timechange_percent, aes(x=clinton_per, xend=clinton_per_2008, y=Town, group=Town))
gg <- gg + geom_dumbbell(color="#a3c4dc", size=0.5, point.colour.l="#0e668b")
# gg <- gg + scale_x_continuous(label=percent)
gg <- gg + labs(x=NULL, y=NULL)
gg <- gg + theme_bw()
gg <- gg+ theme(axis.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", size=6)) 
gg <- gg + theme(plot.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.major.x=element_line())
gg <- gg+ geom_vline(xintercept = 50)
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text = element_text(size = 7))
gg <- gg + labs(title = "Percentage of votes for Clinton by town, 2008 vs. 2016")
gg <- gg + annotate("text", x = 68, y = 140, label = "2008", size=3, colour="gray30")
gg <- gg + annotate("text", x = 68, y = 143, label = "2016", size=3, colour="gray30")
gg <- gg + annotate("point", x = 66, y = 140, colour = "#a3c4dc", size = 2) 
gg <- gg + annotate("point", x = 66, y = 143, colour = "#0e668b", size = 2)
gg <- gg + theme(legend.position="top")
gg <- gg + theme(panel.border=element_blank())
gg
## Warning: Removed 1 rows containing missing values (geom_dumbbell).

Results in towns that Obama won in 2008

## How'd Bernie and Hillary do in Obama towns?

obama_towns <- subset(dem2008, obama_per_2008>clinton_per_2008)
obama_towns <- left_join(obama_towns, percent_margins)
## Joining by: "Town"
names(obama_towns)[names(obama_towns) == 'Town'] <- 'id'

obama_town_map <- obama_towns %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

obama_voters_map <- left_join(town_shape_df, obama_town_map)
## Joining by: "id"
obama_voters_map <- obama_voters_map[!is.na(obama_voters_map$winner),]

# plot map
ggplot(obama_voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() + 
    geom_path(color = "white") +
  labs(title="2016 winners in towns that Obama won in 2008") + 
  coord_equal() + 
  theme_opts

o_town_results <- obama_town_map %>%
  group_by(winner) %>%
  summarise(towns=n())
kable(o_town_results)
winner towns
Clinton 37
Sanders 52

In 2016, Clinton picked up 37 towns that Obama won in 2008.


Results in towns that Clinton won in 2008

clinton_towns <- subset(dem2008, obama_per_2008<clinton_per_2008)
clinton_towns <- left_join(clinton_towns, percent_margins)
## Joining by: "Town"
names(clinton_towns)[names(clinton_towns) == 'Town'] <- 'id'

clinton_town_map <- clinton_towns %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

clinton_voters_map <- left_join(town_shape_df, clinton_town_map)
## Joining by: "id"
clinton_voters_map <- clinton_voters_map[!is.na(clinton_voters_map$winner),]

# plot map
ggplot(clinton_voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() +
    geom_path(color = "white") +
  labs(title="2016 winners in towns that Clinton won in 2008") + 
  coord_equal() + 
  theme_opts

c_town_results <- clinton_town_map %>%
  group_by(winner) %>%
  summarise(towns=n())
kable(c_town_results)
winner towns
Clinton 17
Sanders 61
NA 1

Interestingly, Clinton lost 63 of the towns she won in 2008.

In 2016, she only won 16 of the towns she won in 2008.

But that didn’t make much of a difference because she still won the towns with larger populations.


urban <- read.csv("data/urban_rural.csv", stringsAsFactors=FALSE)
urban <- urban[c("NAME10", "Type")]
colnames(urban) <- c("Town", "Type")

dem_results2 <- left_join(dem_results, urban)
## Joining by: "Town"
dem_percent <- dem_results2 %>%
  select(Town, Type, clinton_per, sanders_per) %>%
  gather("candidate", "percent", 3:4)

dem_percent$candidate <- gsub("_per", "", dem_percent$candidate )

Votes by town category

Sanders has claimed most of his support comes from urban areas with young people and the less-affluent.

How did he do in those urban areas versus rural and mixed towns?

ggplot(dem_percent, aes(x = Type, y = percent, fill = candidate)) +
geom_boxplot() + labs(title="Candidate support winner by town type") 
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

Sanders had more support from rural towns than Clinton, as well as in towns with a mix of urban and rural areas.

Clinton edged out Sanders in support in more-populous, urban cities.

And that made the biggest difference.

dem_total <- dem_results2 %>%
  select(Type, clinton_count, sanders_count) %>%
  gather("candidate", "votes", 2:3) %>%
  group_by(Type, candidate) %>%
  summarise(votes=sum(votes))
  

dem_total$candidate <- gsub("_count", "", dem_total$candidate )

ggplot(dem_total, aes(x=candidate, y=votes, fill=Type)) + geom_bar(stat="identity")  + coord_flip() +
   labs(title="Primary votes by town type") 

The problem with the primary elections

count_head <- c("Population", "Registered.voters", "Voted.in.primaries")
count <- c(3597000, 559741, 231588)

count <- data.frame(cbind(count_head, count))
colnames(count) <- c("Type", "Count")
count$Count <- as.numeric(as.character(count$Count))
# Dem
# 731241

# Rep
# 415689 

# Unaf
# 94542

count$Type <- factor(count$Type, levels=rev(levels(count$Type)))

ggplot(count, aes(x = Type, fill=Type)) +
  geom_bar(stat="identity", aes(y=Count), position="dodge") +
  geom_text(aes(x= Type, y=Count, label=paste0(as.character(round(Count/3597000*100,0)),"%")), hjust=1) + coord_flip() + labs(title="Who actually voted in the CT primaries") 

The number of residents who participated in Connecticut’s presidential primary is tiny compared to the rest of the state’s population. Six percent of the state helped decide the final candidates who will face off during the general election in November.