This is the methodology used behind the story: Analyzing Connecticut’s Democratic primary results.
Visit the repo for the data.
zults <- dem_results %>%
summarise(Clinton=sum(clinton_count), Sanders=sum(sanders_count)) %>%
gather("candidate", "votes", 1:2)
ggplot(zults, aes(x = candidate, fill=candidate)) +
geom_bar(stat="identity", aes(y=votes), position="dodge") +
geom_text(aes(x= candidate, y=votes, label=paste0(as.character(round(votes/sum(votes)*100,0)),"%")), hjust=1) + coord_flip()
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-8, (SVN revision 616)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.0dev, released 2015/99/99
## Path to GDAL shared files: /usr/local/share/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
## Path to PROJ.4 shared files: (autodetected)
## Linking to sp version: 1.2-3
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
town_shape <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
town_shape_df <- fortify(town_shape, region="NAME10")
town_map <- dem_results %>%
mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))
names(town_map)[names(town_map) == 'Town'] <- 'id'
voters_map <- left_join(town_shape_df, town_map)
## Joining by: "id"
theme_opts <- list(theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill="#e6e8ed"),
panel.border = element_blank(),
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(size=22)))
# plot map
ggplot(voters_map, aes(long,lat, group=group, fill=winner)) +
geom_polygon() +
geom_path(color = "white") +
labs(title="Connecticut primary winner by town") +
coord_equal() +
theme_opts
total_margins <- dem_results[c("Town", "clinton_count", "sanders_count", "vote_diff")]
total_margins <- arrange(total_margins, vote_diff)
total_margins <- subset(total_margins, (vote_diff<6) & (vote_diff > -6))
kable(total_margins)
Town | clinton_count | sanders_count | vote_diff | |
---|---|---|---|---|
114 | Killingworth | 356 | 357 | -1 |
115 | Newington | 1631 | 1632 | -1 |
116 | Sharon | 188 | 185 | 3 |
Hillary Clinton needed just two votes to beat Bernie Sanders in Killingworth and Newington.
Sanders was just three votes behind in Sharon.
## Towns with the widest margins
percent_margins <- dem_results[c("Town", "clinton_per", "sanders_per", "percent_diff")]
percent_margins <- arrange(percent_margins, percent_diff)
clinton_lead <- tail(percent_margins, 5)
kable(clinton_lead)
Town | clinton_per | sanders_per | percent_diff | |
---|---|---|---|---|
165 | Hartford | 69.4 | 29.5 | 39.9 |
166 | Weston | 69.8 | 29.6 | 40.2 |
167 | Westport | 70.3 | 28.7 | 41.6 |
168 | New Canaan | 71.8 | 27.5 | 44.3 |
169 | Bloomfield | 74.6 | 24.9 | 49.7 |
Bloomfield, Westport, and Weston saw the biggest win margins for Hillary at about 3 to 1 votes.
sanders_lead <- head(percent_margins, 5)
kable(sanders_lead)
Town | clinton_per | sanders_per | percent_diff |
---|---|---|---|
Voluntown | 25.0 | 69.1 | -44.1 |
Canaan | 29.7 | 69.2 | -39.5 |
Sterling | 29.4 | 68.4 | -39.0 |
Union | 30.1 | 67.5 | -37.4 |
Hartland | 30.7 | 68.1 | -37.4 |
Sanders did quite well in Voluntown, Sterling, and Canaan with a 40 percent point difference between town votes.
dem2008 <- read.csv("data/dem2008.csv", stringsAsFactors=FALSE)
dem2008_total <- dem2008[c("Town", "clinton_total_2008")]
dem2008_percent <- dem2008[c("Town", "clinton_per_2008")]
timechange <- dem_results[c("Town", "clinton_per", "clinton_count")]
timechange_total <- timechange[c("Town", "clinton_count")]
timechange_percent <- timechange[c("Town", "clinton_per")]
timechange_total <- left_join(dem2008_total, timechange_total)
## Joining by: "Town"
timechange_percent <- left_join(dem2008_percent, timechange_percent)
## Joining by: "Town"
timechange_percent <- arrange(timechange_percent, clinton_per)
timechange_percent$Town <- factor(timechange_percent$Town, levels=unique(timechange_percent$Town))
library(tidyr)
library(scales)
library(ggplot2)
devtools::install_github("hrbrmstr/ggalt")
## Downloading GitHub repo hrbrmstr/ggalt@master
## from URL https://api.github.com/repos/hrbrmstr/ggalt/zipball/master
## Installing ggalt
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file \
## --no-environ --no-save --no-restore --quiet CMD INSTALL \
## '/private/var/folders/7g/5fxswp_n6092hwr8n4wkqcfw0000gq/T/Rtmp8foaDp/devtools140617b48ceb/hrbrmstr-ggalt-111bdcc' \
## --library='/Library/Frameworks/R.framework/Versions/3.2/Resources/library' \
## --install-tests
##
## Reloading installed ggalt
library(ggalt)
gg <- ggplot(timechange_percent, aes(x=clinton_per, xend=clinton_per_2008, y=Town, group=Town))
gg <- gg + geom_dumbbell(color="#a3c4dc", size=0.5, point.colour.l="#0e668b")
# gg <- gg + scale_x_continuous(label=percent)
gg <- gg + labs(x=NULL, y=NULL)
gg <- gg + theme_bw()
gg <- gg+ theme(axis.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", size=6))
gg <- gg + theme(plot.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.background=element_rect(fill="#f7f7f7"))
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.major.x=element_line())
gg <- gg+ geom_vline(xintercept = 50)
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text = element_text(size = 7))
gg <- gg + labs(title = "Percentage of votes for Clinton by town, 2008 vs. 2016")
gg <- gg + annotate("text", x = 68, y = 140, label = "2008", size=3, colour="gray30")
gg <- gg + annotate("text", x = 68, y = 143, label = "2016", size=3, colour="gray30")
gg <- gg + annotate("point", x = 66, y = 140, colour = "#a3c4dc", size = 2)
gg <- gg + annotate("point", x = 66, y = 143, colour = "#0e668b", size = 2)
gg <- gg + theme(legend.position="top")
gg <- gg + theme(panel.border=element_blank())
gg
## Warning: Removed 1 rows containing missing values (geom_dumbbell).
## How'd Bernie and Hillary do in Obama towns?
obama_towns <- subset(dem2008, obama_per_2008>clinton_per_2008)
obama_towns <- left_join(obama_towns, percent_margins)
## Joining by: "Town"
names(obama_towns)[names(obama_towns) == 'Town'] <- 'id'
obama_town_map <- obama_towns %>%
mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))
obama_voters_map <- left_join(town_shape_df, obama_town_map)
## Joining by: "id"
obama_voters_map <- obama_voters_map[!is.na(obama_voters_map$winner),]
# plot map
ggplot(obama_voters_map, aes(long,lat, group=group, fill=winner)) +
geom_polygon() +
geom_path(color = "white") +
labs(title="2016 winners in towns that Obama won in 2008") +
coord_equal() +
theme_opts
o_town_results <- obama_town_map %>%
group_by(winner) %>%
summarise(towns=n())
kable(o_town_results)
winner | towns |
---|---|
Clinton | 37 |
Sanders | 52 |
In 2016, Clinton picked up 37 towns that Obama won in 2008.
clinton_towns <- subset(dem2008, obama_per_2008<clinton_per_2008)
clinton_towns <- left_join(clinton_towns, percent_margins)
## Joining by: "Town"
names(clinton_towns)[names(clinton_towns) == 'Town'] <- 'id'
clinton_town_map <- clinton_towns %>%
mutate(winner=ifelse(clinton_per>sanders_per, "Clinton", "Sanders"))
clinton_voters_map <- left_join(town_shape_df, clinton_town_map)
## Joining by: "id"
clinton_voters_map <- clinton_voters_map[!is.na(clinton_voters_map$winner),]
# plot map
ggplot(clinton_voters_map, aes(long,lat, group=group, fill=winner)) +
geom_polygon() +
geom_path(color = "white") +
labs(title="2016 winners in towns that Clinton won in 2008") +
coord_equal() +
theme_opts
c_town_results <- clinton_town_map %>%
group_by(winner) %>%
summarise(towns=n())
kable(c_town_results)
winner | towns |
---|---|
Clinton | 17 |
Sanders | 61 |
NA | 1 |
Interestingly, Clinton lost 63 of the towns she won in 2008.
In 2016, she only won 16 of the towns she won in 2008.
But that didn’t make much of a difference because she still won the towns with larger populations.
urban <- read.csv("data/urban_rural.csv", stringsAsFactors=FALSE)
urban <- urban[c("NAME10", "Type")]
colnames(urban) <- c("Town", "Type")
dem_results2 <- left_join(dem_results, urban)
## Joining by: "Town"
dem_percent <- dem_results2 %>%
select(Town, Type, clinton_per, sanders_per) %>%
gather("candidate", "percent", 3:4)
dem_percent$candidate <- gsub("_per", "", dem_percent$candidate )
Sanders has claimed most of his support comes from urban areas with young people and the less-affluent.
How did he do in those urban areas versus rural and mixed towns?
ggplot(dem_percent, aes(x = Type, y = percent, fill = candidate)) +
geom_boxplot() + labs(title="Candidate support winner by town type")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
Sanders had more support from rural towns than Clinton, as well as in towns with a mix of urban and rural areas.
Clinton edged out Sanders in support in more-populous, urban cities.
And that made the biggest difference.
dem_total <- dem_results2 %>%
select(Type, clinton_count, sanders_count) %>%
gather("candidate", "votes", 2:3) %>%
group_by(Type, candidate) %>%
summarise(votes=sum(votes))
dem_total$candidate <- gsub("_count", "", dem_total$candidate )
ggplot(dem_total, aes(x=candidate, y=votes, fill=Type)) + geom_bar(stat="identity") + coord_flip() +
labs(title="Primary votes by town type")
count_head <- c("Population", "Registered.voters", "Voted.in.primaries")
count <- c(3597000, 559741, 231588)
count <- data.frame(cbind(count_head, count))
colnames(count) <- c("Type", "Count")
count$Count <- as.numeric(as.character(count$Count))
# Dem
# 731241
# Rep
# 415689
# Unaf
# 94542
count$Type <- factor(count$Type, levels=rev(levels(count$Type)))
ggplot(count, aes(x = Type, fill=Type)) +
geom_bar(stat="identity", aes(y=Count), position="dodge") +
geom_text(aes(x= Type, y=Count, label=paste0(as.character(round(Count/3597000*100,0)),"%")), hjust=1) + coord_flip() + labs(title="Who actually voted in the CT primaries")
The number of residents who participated in Connecticut’s presidential primary is tiny compared to the rest of the state’s population. Six percent of the state helped decide the final candidates who will face off during the general election in November.