This analysis accompanies the Trend CT story

This analysis is based off data aggregated by the Sunlight Foundationâ€™s PoliticalAdSleuth.com.

`ad_extractor.R`

pulled the dataframe from politicaladsleuth.com and PDFs from the FCC`parser_totals.R`

extracted data from the converted PDFs and pulled it into a dataframe- Data is only as recent as 4/15/16. Primary election day is 4/26/16, so expect more filings between then.
- Check out the repo with all scripts and data

Letâ€™s load up the packages weâ€™ll need

```
library(RCurl)
library(dplyr)
library(lubridate)
library(stringr)
library(ggplot2)
library(tidyr)
library(DT)
```

`ad_extractor.R`

and `parser_totals.R`

and joining them.```
url <- "http://trendct.github.io/data/2016/04/ad-buys-ct/data/ad_buys.csv"
the_csv <- getURL(url)
totals <- read.csv(textConnection(the_csv), stringsAsFactors=FALSE)
# totals <- read.csv("data/ad_buys.csv", stringsAsFactors=FALSE)
totals$gross.amount <- gsub("\\$", "", totals$gross.amount)
totals$gross.amount <- gsub(",", "", totals$gross.amount)
totals$gross.amount <- as.numeric(totals$gross.amount)
totals$net.amount <- gsub("\\$", "", totals$net.amount)
totals$net.amount <- gsub(",", "", totals$net.amount)
totals$net.amount <- as.numeric(totals$net.amount)
totals$agency.commission <- totals$gross.amount - totals$net.amount
# making the dates recongized by R
totals$time.period.start <- mdy(totals$time.period.start)
totals$time.period.end <- mdy(totals$time.period.end)
totals$sheet <- gsub("spreadsheets/", "", totals$sheet)
totals$sheet <- gsub(".xlsx", "", totals$sheet)
# joining the data with the original dataframe from ad_extractor.R
# this will add more details like station and advertiser
url <- "http://trendct.github.io/data/2016/04/ad-buys-ct/data/ads_dataframe.csv"
the_csv <- getURL(url)
presi_table <- read.csv(textConnection(the_csv), stringsAsFactors=FALSE)
# presi_table <- read.csv("data/ads_dataframe.csv", stringsAsFactors=FALSE)
totals <- left_join(totals, presi_table, by="sheet")
# create a new column identifying the candidate based on what group bought the ad
totals$candidate <- ""
for (i in 1:nrow(totals)) {
info <- str_to_upper(totals$Advertiser.File.Info[i])
if (grepl("BERNIE", info)) {
totals$candidate[i] <- "Bernie Sanders"
} else if (grepl("HILLARY", info)) {
totals$candidate[i] <- "Hillary Clinton"
} else {
totals$candidate[i] <- "Other"
}
}
# cleaning up the column names to eliminate spaces
colnames(totals) <- c("row", "time.period.start", "time.period.end", "spots", "gross.amount", "agency.commission", "net.amount", "sheet", "row2", "tv.station", "market", "date", "type", "status", "spots2", "cost2", "advertiser", "link", "doc", "candidate")
totals <- totals[c("candidate", "tv.station", "market", "date", "time.period.start", "time.period.end", "spots", "gross.amount", "agency.commission", "net.amount", "sheet", "advertiser", "link", "doc")]
```

- Determining the group purchasing the ads
- Discarding out-of-date contracts
- Discarding invoices

```
# create a new column identifying the group
totals$group <- ""
for (i in 1:nrow(totals)) {
info <- str_to_upper(totals$advertiser[i])
if (grepl("HILLARY 2016", info)) {
totals$group[i] <- "Hillary 2016"
} else if (grepl("BERNIE SANDERS FOR PRESIDENT", info)) {
totals$group[i] <- "Bernie Sanders for President"
} else if (grepl("HILLARY CLINTON", info)) {
totals$group[i] <- "Hillary Clinton"
} else if (grepl("BERNIE SANDERS", info)) {
totals$group[i] <- "Bernie Sanders"
} else if (grepl("BERNIE 2016", info)) {
totals$group[i] <- "Bernie 2016"
} else if (grepl("HILLARY FOR AMERICA", info)) {
totals$group[i] <- "Hillary for America"
}
}
# Picking out the most-recent contracts, discarding the invoices with no pricing data
check <- totals %>%
group_by(tv.station, time.period.start, time.period.end, candidate) %>%
arrange(-spots)
check2 <- totals
check2$mega <- paste(check2$tv.station, check2$time.period.start,check2$time.period.end,check2$candidate)
check3 <- check2 %>%
group_by(mega) %>%
top_n(n = 1, wt=spots)
# Making sure there are no duplicate rows
check3 <- unique(check3)
# Now check to see if there are any overlapping time periods
check3$interval <- interval(check3$time.period.start, check3$time.period.end)
check3$overlaps <- ""
rows <- nrow(check3)-1
for (i in 1:rows) {
if ((check3$tv.station[i]==check3$tv.station[i+1]) && (check3$group[i]==check3$group[i+1])) {
if (int_overlaps(check3$interval[i], check3$interval[i+1])) {
check3$overlaps[i] <- "yes"
check3$overlaps[i+1] <- "yes" }
else {
check3$overlaps[i] <- "no"
}
} else {
check3$overlaps[i] <- "no"
}
}
# Alright, just a handful of overlapping contact periods
# It looks like there was one Hillary contract that was revised to expand the time period and increase ad buys
# Will take out the older contract
check3 <- subset(check3, sheet!="04_11_16WFSB14603982219469_38")
```

```
# How much did each candidate spend on ad buys in CT?
tapply(check3$gross.amount, check3$candidate, sum)
```

```
## Bernie Sanders Hillary Clinton
## 722190 171720
```

```
# How many ads did they buy each?
tapply(check3$spots, check3$candidate, sum)
```

```
## Bernie Sanders Hillary Clinton
## 1864 784
```

```
## How many ads by station?
tapply(check3$spots, check3$tv.station, sum)
```

```
## WCCT-TV WCTX WFSB WTIC-TV WTNH WVIT
## 405 415 431 335 551 511
```

```
# How many by ad period start date?
tapply(check3$spots, check3$time.period.start, sum)
```

```
## 2016-03-28 2016-04-25
## 2446 202
```

```
# How many ads did each group purchase?
tapply(check3$spots, check3$group, sum)
```

```
## Bernie 2016 Bernie Sanders
## 936 532
## Bernie Sanders for President Hillary 2016
## 396 389
## Hillary Clinton Hillary for America
## 208 187
```

```
# How much was spent by each group?
tapply(check3$gross.amount, check3$group, sum)
```

```
## Bernie 2016 Bernie Sanders
## 344060 79645
## Bernie Sanders for President Hillary 2016
## 298485 60550
## Hillary Clinton Hillary for America
## 26830 84340
```

```
# How much was spent total by date of FCC filing?
tapply(check3$gross.amount, check3$date, sum)
```

```
## 04/08/16 04/11/16 04/12/16 04/13/16 04/14/16 04/15/16
## 23405 183810 94195 296785 109805 185910
```

```
## Money spent by FCC filing date
sum_date <- check3 %>%
group_by(date, candidate, tv.station) %>%
summarise(total=sum(gross.amount))
sum_date_only <- check3 %>%
group_by(date, candidate) %>%
summarise(total=sum(gross.amount))
ggplot(data=sum_date_only, aes(x=date, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge()) + ggtitle("Total spent on ad buys by day")
```

```
ggplot(data=sum_date, aes(x=date, y=total, fill=candidate)) +
geom_bar(stat="identity", position=position_dodge())+ facet_wrap(~tv.station) +
ggtitle("Total spent on ad buys by day and station")
```