This analysis accompanies the Trend CT story
This analysis is based off data aggregated by the Sunlight Foundation’s PoliticalAdSleuth.com.
ad_extractor.R
pulled the dataframe from politicaladsleuth.com and PDFs from the FCCparser_totals.R
extracted data from the converted PDFs and pulled it into a dataframeLet’s load up the packages we’ll need
library(RCurl)
library(dplyr)
library(lubridate)
library(stringr)
library(ggplot2)
library(tidyr)
library(DT)
ad_extractor.R
and parser_totals.R
and joining them.url <- "http://trendct.github.io/data/2016/04/ad-buys-ct/data/ad_buys.csv"
the_csv <- getURL(url)
totals <- read.csv(textConnection(the_csv), stringsAsFactors=FALSE)
# totals <- read.csv("data/ad_buys.csv", stringsAsFactors=FALSE)
totals$gross.amount <- gsub("\\$", "", totals$gross.amount)
totals$gross.amount <- gsub(",", "", totals$gross.amount)
totals$gross.amount <- as.numeric(totals$gross.amount)
totals$net.amount <- gsub("\\$", "", totals$net.amount)
totals$net.amount <- gsub(",", "", totals$net.amount)
totals$net.amount <- as.numeric(totals$net.amount)
totals$agency.commission <- totals$gross.amount - totals$net.amount
# making the dates recongized by R
totals$time.period.start <- mdy(totals$time.period.start)
totals$time.period.end <- mdy(totals$time.period.end)
totals$sheet <- gsub("spreadsheets/", "", totals$sheet)
totals$sheet <- gsub(".xlsx", "", totals$sheet)
# joining the data with the original dataframe from ad_extractor.R
# this will add more details like station and advertiser
url <- "http://trendct.github.io/data/2016/04/ad-buys-ct/data/ads_dataframe.csv"
the_csv <- getURL(url)
presi_table <- read.csv(textConnection(the_csv), stringsAsFactors=FALSE)
# presi_table <- read.csv("data/ads_dataframe.csv", stringsAsFactors=FALSE)
totals <- left_join(totals, presi_table, by="sheet")
# create a new column identifying the candidate based on what group bought the ad
totals$candidate <- ""
for (i in 1:nrow(totals)) {
info <- str_to_upper(totals$Advertiser.File.Info[i])
if (grepl("BERNIE", info)) {
totals$candidate[i] <- "Bernie Sanders"
} else if (grepl("HILLARY", info)) {
totals$candidate[i] <- "Hillary Clinton"
} else {
totals$candidate[i] <- "Other"
}
}
# cleaning up the column names to eliminate spaces
colnames(totals) <- c("row", "time.period.start", "time.period.end", "spots", "gross.amount", "agency.commission", "net.amount", "sheet", "row2", "tv.station", "market", "date", "type", "status", "spots2", "cost2", "advertiser", "link", "doc", "candidate")
totals <- totals[c("candidate", "tv.station", "market", "date", "time.period.start", "time.period.end", "spots", "gross.amount", "agency.commission", "net.amount", "sheet", "advertiser", "link", "doc")]
# create a new column identifying the group
totals$group <- ""
for (i in 1:nrow(totals)) {
info <- str_to_upper(totals$advertiser[i])
if (grepl("HILLARY 2016", info)) {
totals$group[i] <- "Hillary 2016"
} else if (grepl("BERNIE SANDERS FOR PRESIDENT", info)) {
totals$group[i] <- "Bernie Sanders for President"
} else if (grepl("HILLARY CLINTON", info)) {
totals$group[i] <- "Hillary Clinton"
} else if (grepl("BERNIE SANDERS", info)) {
totals$group[i] <- "Bernie Sanders"
} else if (grepl("BERNIE 2016", info)) {
totals$group[i] <- "Bernie 2016"
} else if (grepl("HILLARY FOR AMERICA", info)) {
totals$group[i] <- "Hillary for America"
}
}
# Picking out the most-recent contracts, discarding the invoices with no pricing data
check <- totals %>%
group_by(tv.station, time.period.start, time.period.end, candidate) %>%
arrange(-spots)
check2 <- totals
check2$mega <- paste(check2$tv.station, check2$time.period.start,check2$time.period.end,check2$candidate)
check3 <- check2 %>%
group_by(mega) %>%
top_n(n = 1, wt=spots)
# Making sure there are no duplicate rows
check3 <- unique(check3)
# Now check to see if there are any overlapping time periods
check3$interval <- interval(check3$time.period.start, check3$time.period.end)
check3$overlaps <- ""
rows <- nrow(check3)-1
for (i in 1:rows) {
if ((check3$tv.station[i]==check3$tv.station[i+1]) && (check3$group[i]==check3$group[i+1])) {
if (int_overlaps(check3$interval[i], check3$interval[i+1])) {
check3$overlaps[i] <- "yes"
check3$overlaps[i+1] <- "yes" }
else {
check3$overlaps[i] <- "no"
}
} else {
check3$overlaps[i] <- "no"
}
}
# Alright, just a handful of overlapping contact periods
# It looks like there was one Hillary contract that was revised to expand the time period and increase ad buys
# Will take out the older contract
check3 <- subset(check3, sheet!="04_11_16WFSB14603982219469_38")
# How much did each candidate spend on ad buys in CT?
tapply(check3$gross.amount, check3$candidate, sum)
## Bernie Sanders Hillary Clinton
## 722190 171720
# How many ads did they buy each?
tapply(check3$spots, check3$candidate, sum)
## Bernie Sanders Hillary Clinton
## 1864 784
## How many ads by station?
tapply(check3$spots, check3$tv.station, sum)
## WCCT-TV WCTX WFSB WTIC-TV WTNH WVIT
## 405 415 431 335 551 511
# How many by ad period start date?
tapply(check3$spots, check3$time.period.start, sum)
## 2016-03-28 2016-04-25
## 2446 202
# How many ads did each group purchase?
tapply(check3$spots, check3$group, sum)
## Bernie 2016 Bernie Sanders
## 936 532
## Bernie Sanders for President Hillary 2016
## 396 389
## Hillary Clinton Hillary for America
## 208 187
# How much was spent by each group?
tapply(check3$gross.amount, check3$group, sum)
## Bernie 2016 Bernie Sanders
## 344060 79645
## Bernie Sanders for President Hillary 2016
## 298485 60550
## Hillary Clinton Hillary for America
## 26830 84340
# How much was spent total by date of FCC filing?
tapply(check3$gross.amount, check3$date, sum)
## 04/08/16 04/11/16 04/12/16 04/13/16 04/14/16 04/15/16
## 23405 183810 94195 296785 109805 185910
## Money spent by FCC filing date
sum_date <- check3 %>%
group_by(date, candidate, tv.station) %>%
summarise(total=sum(gross.amount))
sum_date_only <- check3 %>%
group_by(date, candidate) %>%
summarise(total=sum(gross.amount))
ggplot(data=sum_date_only, aes(x=date, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) + ggtitle("Total spent on ad buys by day")
ggplot(data=sum_date, aes(x=date, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge())+ facet_wrap(~tv.station) +
ggtitle("Total spent on ad buys by day and station")
## Money spent by start period
sum_period_start <- check3 %>%
group_by(time.period.start, candidate, tv.station) %>%
summarise(total=sum(gross.amount))
ggplot(data=sum_period_start, aes(x=time.period.start, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) +
ggtitle("Money spent by start period")
ggplot(data=sum_period_start, aes(x=time.period.start, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge())+ facet_wrap(~tv.station) +
ggtitle("Money spent by start period and station")
## Money spent by group
sum_group <- check3 %>%
group_by(group, candidate, tv.station) %>%
summarise(total=sum(gross.amount))
ggplot(data=sum_group, aes(x=group, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) +
ggtitle("Money spent by group")
ggplot(data=sum_group, aes(x=group, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge())+ facet_wrap(~tv.station) + coord_flip() +
ggtitle("Money spent by group and station")
## Spots by FCC filing date
spots_date <- check3 %>%
group_by(date, candidate, tv.station) %>%
summarise(spots=sum(spots))
spots_date2 <- check3 %>%
group_by(date, candidate) %>%
summarise(spots=sum(spots))
ggplot(data=spots_date2, aes(x=date, y=spots, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) +
ggtitle("Total ads purchased by day")
ggplot(data=spots_date, aes(x=date, y=spots, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge())+ facet_wrap(~tv.station) +
ggtitle("Total ads purchased by day and station")
## spots by start period
spot_period_start <- check3 %>%
group_by(time.period.start, candidate, tv.station) %>%
summarise(spots=sum(spots))
ggplot(data=spot_period_start, aes(x=time.period.start, y=spots, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) +
ggtitle("Spots by start period")
ggplot(data=spot_period_start, aes(x=time.period.start, y=spots, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) + facet_wrap(~tv.station) + theme_minimal() +
ggtitle("Spots by start period and station")
# Spots and Candidates and Station
scs <- check3 %>%
group_by(candidate, tv.station) %>%
summarise(spots=sum(spots))
ggplot(data=scs, aes(x=tv.station, y=spots, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) + theme_minimal() +
ggtitle("Spots and candidates and station")
## for datatables
for_dt <- check3
for_dt <- for_dt[c("candidate", "group", "tv.station", "date", "spots", "net.amount", "agency.commission", "gross.amount", "time.period.start", "time.period.end")]
colnames(for_dt) <- c("Candidate", "Group", "Station", "Date", "Spots", "Net amount", "Commission", "Gross amount", "Ad start", "Ad end")
for_dt <- data.frame(for_dt)
datatable(for_dt)
Read the story in TrendCT.org:
Bernie Sanders, Hillary Clinton campaigns increasing ad buys before CT primaries