Analyzing Connecticut’s Democratic primary results

This is the methodology used behind the story: Analyzing Connecticut’s Democratic primary results.

Visit the repo for the data.

Who won the Democratic presidential primary election in Connecticut?

zults <- dem_results %>%
  summarise(Clinton=sum(clinton_count), Sanders=sum(sanders_count)) %>%
  gather("candidate", "votes", 1:2)

ggplot(zults, aes(x = candidate, fill=candidate)) +
  geom_bar(stat="identity", aes(y=votes), position="dodge") +
  geom_text(aes(x= candidate, y=votes, label=paste0(as.character(round(votes/sum(votes)*100,0)),"%")), hjust=1) + coord_flip()

Mapping the results

library(rgdal)

## Loading required package: sp

## rgdal: version: 1.1-8, (SVN revision 616)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.0dev, released 2015/99/99
##  Path to GDAL shared files: /usr/local/share/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.2-3

require(maptools)

## Loading required package: maptools

## Checking rgeos availability: TRUE

town_shape <- readOGR(dsn="maps", layer="ctgeo")

## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields

town_shape_df <- fortify(town_shape, region="NAME10")

town_map <- dem_results %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

names(town_map)[names(town_map) == 'Town'] <- 'id'

voters_map <- left_join(town_shape_df, town_map)

## Joining by: "id"

theme_opts <- list(theme(panel.grid.minor = element_blank(),
                        panel.grid.major = element_blank(),
                        panel.background = element_blank(),
                        plot.background = element_rect(fill="#e6e8ed"),
                        panel.border = element_blank(),
                        axis.line = element_blank(),
                        axis.text.x = element_blank(),
                        axis.text.y = element_blank(),
                        axis.ticks = element_blank(),
                        axis.title.x = element_blank(),
                        axis.title.y = element_blank(),
                        plot.title = element_text(size=22)))

# plot map
ggplot(voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() + 
  geom_path(color = "white") +
  labs(title="Connecticut primary winner by town") + 
  coord_equal() + 
  theme_opts

Towns with the closest margins

total_margins <- dem_results[c("Town", "clinton_count", "sanders_count", "vote_diff")]
total_margins <- arrange(total_margins, vote_diff)
total_margins <- subset(total_margins, (vote_diff<6) & (vote_diff > -6))

kable(total_margins)

	Town	clinton_count	sanders_count	vote_diff
114	Killingworth	356	357	-1
115	Newington	1631	1632	-1
116	Sharon	188	185	3

Hillary Clinton needed just two votes to beat Bernie Sanders in Killingworth and Newington.

Sanders was just three votes behind in Sharon.

Where was Hillary most successful against Bernie?

## Towns with the widest margins

percent_margins <- dem_results[c("Town", "clinton_per", "sanders_per", "percent_diff")]
percent_margins <- arrange(percent_margins, percent_diff)

clinton_lead <- tail(percent_margins, 5)
kable(clinton_lead)

	Town	clinton_per	sanders_per	percent_diff
165	Hartford	69.4	29.5	39.9
166	Weston	69.8	29.6	40.2
167	Westport	70.3	28.7	41.6
168	New Canaan	71.8	27.5	44.3
169	Bloomfield	74.6	24.9	49.7

Bloomfield, Westport, and Weston saw the biggest win margins for Hillary at about 3 to 1 votes.

Where did Hillary lost the most to Bernie?

sanders_lead <- head(percent_margins, 5)
kable(sanders_lead)

Town	clinton_per	sanders_per	percent_diff
Voluntown	25.0	69.1	-44.1
Canaan	29.7	69.2	-39.5
Sterling	29.4	68.4	-39.0
Union	30.1	67.5	-37.4
Hartland	30.7	68.1	-37.4

Sanders did quite well in Voluntown, Sterling, and Canaan with a 40 percent point difference between town votes.

How did Hillary in this year’s primary election versus the 2008 primary?

dem2008 <- read.csv("data/dem2008.csv", stringsAsFactors=FALSE)
dem2008_total <- dem2008[c("Town", "clinton_total_2008")]
dem2008_percent <- dem2008[c("Town", "clinton_per_2008")]

timechange <- dem_results[c("Town", "clinton_per", "clinton_count")]
timechange_total <- timechange[c("Town", "clinton_count")]
timechange_percent <- timechange[c("Town", "clinton_per")]

timechange_total <- left_join(dem2008_total, timechange_total)

## Joining by: "Town"

timechange_percent <- left_join(dem2008_percent, timechange_percent)

## Joining by: "Town"

timechange_percent <- arrange(timechange_percent, clinton_per)
timechange_percent$Town <- factor(timechange_percent$Town, levels=unique(timechange_percent$Town))

library(tidyr)
library(scales)
library(ggplot2)
devtools::install_github("hrbrmstr/ggalt")

## Downloading GitHub repo hrbrmstr/ggalt@master
## from URL https://api.github.com/repos/hrbrmstr/ggalt/zipball/master

## Installing ggalt

## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/7g/5fxswp_n6092hwr8n4wkqcfw0000gq/T/Rtmp8foaDp/devtools140617b48ceb/hrbrmstr-ggalt-111bdcc'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.2/Resources/library'  \
##   --install-tests

##

## Reloading installed ggalt

library(ggalt)


gg <- ggplot(timechange_percent, aes(x=clinton_per, xend=clinton_per_2008, y=Town, group=Town))
gg <- gg + geom_dumbbell(color="#a3c4dc", size=0.5, point.colour.l="#0e668b")
# gg <- gg + scale_x_continuous(label=percent)
gg <- gg + labs(x=NULL, y=NULL)
gg <- gg + theme_bw()
gg <- gg+ theme(axis.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", size=6)) 
gg <- gg + theme(plot.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.major.x=element_line())
gg <- gg+ geom_vline(xintercept = 50)
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text = element_text(size = 7))
gg <- gg + labs(title = "Percentage of votes for Clinton by town, 2008 vs. 2016")
gg <- gg + annotate("text", x = 68, y = 140, label = "2008", size=3, colour="gray30")
gg <- gg + annotate("text", x = 68, y = 143, label = "2016", size=3, colour="gray30")
gg <- gg + annotate("point", x = 66, y = 140, colour = "#a3c4dc", size = 2) 
gg <- gg + annotate("point", x = 66, y = 143, colour = "#0e668b", size = 2)
gg <- gg + theme(legend.position="top")
gg <- gg + theme(panel.border=element_blank())
gg

## Warning: Removed 1 rows containing missing values (geom_dumbbell).

Results in towns that Obama won in 2008

## How'd Bernie and Hillary do in Obama towns?

obama_towns <- subset(dem2008, obama_per_2008>clinton_per_2008)
obama_towns <- left_join(obama_towns, percent_margins)

## Joining by: "Town"

names(obama_towns)[names(obama_towns) == 'Town'] <- 'id'

obama_town_map <- obama_towns %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

obama_voters_map <- left_join(town_shape_df, obama_town_map)

## Joining by: "id"

obama_voters_map <- obama_voters_map[!is.na(obama_voters_map$winner),]

# plot map
ggplot(obama_voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() + 
    geom_path(color = "white") +
  labs(title="2016 winners in towns that Obama won in 2008") + 
  coord_equal() + 
  theme_opts

o_town_results <- obama_town_map %>%
  group_by(winner) %>%
  summarise(towns=n())
kable(o_town_results)

winner	towns
Clinton	37
Sanders	52

In 2016, Clinton picked up 37 towns that Obama won in 2008.

Results in towns that Clinton won in 2008

clinton_towns <- subset(dem2008, obama_per_2008<clinton_per_2008)
clinton_towns <- left_join(clinton_towns, percent_margins)

## Joining by: "Town"

names(clinton_towns)[names(clinton_towns) == 'Town'] <- 'id'

clinton_town_map <- clinton_towns %>%
  mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))

clinton_voters_map <- left_join(town_shape_df, clinton_town_map)

## Joining by: "id"

clinton_voters_map <- clinton_voters_map[!is.na(clinton_voters_map$winner),]

# plot map
ggplot(clinton_voters_map, aes(long,lat, group=group, fill=winner)) + 
  geom_polygon() +
    geom_path(color = "white") +
  labs(title="2016 winners in towns that Clinton won in 2008") + 
  coord_equal() + 
  theme_opts

c_town_results <- clinton_town_map %>%
  group_by(winner) %>%
  summarise(towns=n())
kable(c_town_results)

winner	towns
Clinton	17
Sanders	61
NA	1

Interestingly, Clinton lost 63 of the towns she won in 2008.

In 2016, she only won 16 of the towns she won in 2008.

But that didn’t make much of a difference because she still won the towns with larger populations.

urban <- read.csv("data/urban_rural.csv", stringsAsFactors=FALSE)
urban <- urban[c("NAME10", "Type")]
colnames(urban) <- c("Town", "Type")

dem_results2 <- left_join(dem_results, urban)

## Joining by: "Town"

dem_percent <- dem_results2 %>%
  select(Town, Type, clinton_per, sanders_per) %>%
  gather("candidate", "percent", 3:4)

dem_percent$candidate <- gsub("_per", "", dem_percent$candidate )

Votes by town category

Sanders has claimed most of his support comes from urban areas with young people and the less-affluent.

How did he do in those urban areas versus rural and mixed towns?

ggplot(dem_percent, aes(x = Type, y = percent, fill = candidate)) +
geom_boxplot() + labs(title="Candidate support winner by town type")

## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

Sanders had more support from rural towns than Clinton, as well as in towns with a mix of urban and rural areas.

Clinton edged out Sanders in support in more-populous, urban cities.

And that made the biggest difference.

dem_total <- dem_results2 %>%
  select(Type, clinton_count, sanders_count) %>%
  gather("candidate", "votes", 2:3) %>%
  group_by(Type, candidate) %>%
  summarise(votes=sum(votes))
  

dem_total$candidate <- gsub("_count", "", dem_total$candidate )

ggplot(dem_total, aes(x=candidate, y=votes, fill=Type)) + geom_bar(stat="identity")  + coord_flip() +
   labs(title="Primary votes by town type")

The problem with the primary elections

count_head <- c("Population", "Registered.voters", "Voted.in.primaries")
count <- c(3597000, 559741, 231588)

count <- data.frame(cbind(count_head, count))
colnames(count) <- c("Type", "Count")
count$Count <- as.numeric(as.character(count$Count))
# Dem
# 731241

# Rep
# 415689 

# Unaf
# 94542

count$Type <- factor(count$Type, levels=rev(levels(count$Type)))

ggplot(count, aes(x = Type, fill=Type)) +
  geom_bar(stat="identity", aes(y=Count), position="dodge") +
  geom_text(aes(x= Type, y=Count, label=paste0(as.character(round(Count/3597000*100,0)),"%")), hjust=1) + coord_flip() + labs(title="Who actually voted in the CT primaries")

The number of residents who participated in Connecticut’s presidential primary is tiny compared to the rest of the state’s population. Six percent of the state helped decide the final candidates who will face off during the general election in November.