An analysis for the TrendCT story: Increase in gun purchases triggered by Connecticut state legislation

The data is/are from the Buzzfeed repo that scraped the FBI NICS PDFs on the number of firearm checks by month, state, and type.

Check the Trend CT repo for the scripts and data sets that were used and generated. Just want the seasonally adjusted state per capita data? Here’s the csv.

There are many caveats to the data, but the basic thing to take away is that a background check does not necessarily mean a gun sale. The Buzzfeed repo has more details.

This analysis will go over the process used to put together the story. In short, the background checks were normalized for annual population by state, adjusted for seasonality, and then charted.

Population estimates

The historical population data was gathered from Census.gov.

# Packages we'll need
library(knitr)
library(stringr)
library(ggplot2)
library(lubridate)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(seasonal)

us <- read.csv("data/historicalpop-US.csv", stringsAsFactors=FALSE)
states <- read.csv("data/historicalpop-StatesT.csv", stringsAsFactors=FALSE)
kable(head(us))
Year US
1998 275854104
1999 279040168
2000 282162411
2001 284968955
2002 287625193
2003 290107933
kable(head(states[,1:5]))
Year Alabama Alaska Arizona Arkansas
1998 4404701 619932 4883342 2626289
1999 4430141 624779 5023823 2651860
2000 4452173 627963 5160586 2678588
2001 4467634 633714 5273477 2691571
2002 4480089 642337 5396255 2705927
2003 4503491 648414 5510364 2724816

Bringing in the data from the Buzzfeed repo.

nics <- read.csv("data/nics-firearm-background-checks.csv", stringsAsFactors=FALSE)
# Quick trimming up of leading and trailing white spaces
# Using the stringr package
nics$state <- str_trim(nics$state)
kable(head(nics[,1:5]))
month state permit handgun long_gun
2015-11 Alabama 18870 23022 22650
2015-11 Alaska 209 3062 3209
2015-11 Arizona 2303 12382 9041
2015-11 Arkansas 3298 6359 11611
2015-11 California 98452 41181 35007
2015-11 Colorado 4144 19784 16082

Preliminary annual US analysis

# Adding a column for year in the dataframe
# But first have to convert the data into a date-friendly format

nics$date <- as.Date(paste(nics$month,"-01",sep=""))
nics$year <- year(nics$date)

# Creating a new dataframe with the total background checks in the country by year
annual <- data.frame(tapply(nics$totals, nics$year, sum))

# Cleaning up the new dataframe
annual$year <- rownames(annual)
rownames(annual) <- NULL
colnames(annual) <- c("Total", "Year")
annual <- annual[c("Year", "Total")]

# Merging background checks to population dataframe
annual <- merge(annual, us)

# Calculating per capita (1,000 residents)
annual$percapita <- round((annual$Total/annual$US)*1000,2)

# 1998 is an incomplete year, so let's take that out
annual <- annual[-1,]

kable(head(annual))
Year Total US percapita
2 1999 9043747 279040168 32.41
3 2000 8427096 282162411 29.87
4 2001 8820045 284968955 30.95
5 2002 8367069 287625193 29.09
6 2003 8402244 290107933 28.96
7 2004 8579891 292805298 29.30

Charting the total background checks by year

ggplot(data=annual, aes(x=Year,y=Total, group=1)) +
  geom_line() +
  ggtitle("Background checks for firearms in the US") +
  labs(x="Year", y="Total")

Charting the total background checks by year per capita

ggplot(data=annual, aes(x=Year,y=percapita, group=1)) +
  geom_line() +
  ggtitle("Background checks per capita for firearms in the US") +
  labs(x="Year", y="Per 1,000 residents")

Not much difference, right.

Monthly totals for the US

monthly <-data.frame(tapply(nics$totals, nics$date, sum))
monthly$date <- rownames(monthly)
rownames(monthly) <- NULL
colnames(monthly) <- c("Total", "Month")
monthly<- monthly[c("Month", "Total")]
kable(head(monthly))
Month Total
1998-11-01 21176
1998-12-01 870722
1999-01-01 585974
1999-02-01 690215
1999-03-01 741687
1999-04-01 638666

Monthly background checks for the US per capita

monthly$Month <- ymd(monthly$Month)
monthly$Year <- year(monthly$Month)

# Join by annual population from the Census
monthly <- left_join(monthly, us)
## Joining by: "Year"
monthly$percapita <- round((monthly$Total/monthly$US)*1000,2)

us_month <- monthly[c("Month", "percapita")]
colnames(us_month) <- c("Month", "US")
kable(head(us_month))
Month US
1998-11-01 0.08
1998-12-01 3.16
1999-01-01 2.10
1999-02-01 2.47
1999-03-01 2.66
1999-04-01 2.29

Monthly totals for Connecticut

ct <- subset(nics, state=="Connecticut")
ctpop  <- read.csv("data/historicalpop-CT.csv", stringsAsFactors=FALSE)
ct_monthly<-data.frame(tapply(ct$totals, ct$month, sum))
ct_monthly$date <- rownames(ct_monthly)
rownames(ct_monthly) <- NULL
colnames(ct_monthly) <- c("Total", "Month")
ct_monthly<- ct_monthly[c("Month", "Total")]
kable(head(ct_monthly))
Month Total
1998-11 80
1998-12 6790
1999-01 6265
1999-02 8069
1999-03 7877
1999-04 9111

Monthly checks per capita: US vs CT

ct_monthly$Month <- as.Date(paste(ct_monthly$Month, "-01", sep=""))
ct_monthly$Month <- ymd(ct_monthly$Month)
ct_monthly$Year <- year(ct_monthly$Month)

ct_monthly <- left_join(ct_monthly, ctpop)
## Joining by: "Year"
ct_monthly$percapita <- round((ct_monthly$Total/ct_monthly$CT)*1000,2)

ct_month <- ct_monthly[c("Month", "percapita")]
colnames(ct_month) <- c("Month", "CT")

ct_us_month <- left_join(us_month, ct_month)
## Joining by: "Month"
# Prepping the dataframe for ggplot
ct_us_month_gg <- gather(ct_us_month, "State", "Per.Capita", 2:3)
## Warning: attributes are not identical across measure variables; they will
## be dropped
ggplot(data=ct_us_month_gg, aes(x=Month,y=Per.Capita, group=State, colour=State)) +
  geom_line() +
  ggtitle("Background checks per capita for firearms in the US and CT") +
  labs(x="Year", y="Per 1,000 residents")