Replicating the Vox lead risk map for CT

This is an exploratory map going over the steps described by Sarah Frostenson at Vox and Rad Cunningham from the Washington State Department of Health that estimates a neighborhood’s risk for lead exposure. It inspired the TrendCT.org story Which neighborhoods have the highest risk of lead poisoning in CT?

Check out the Vox story this was based on: The risk of lead poisoning isn’t just in Flint. So we mapped the risk in every neighborhood in America.

Overview

What’s going on here

Pulling housing and poverty data by census tract from the Census via the excellent censusapi R package
Calculating a housing score based on weighted categorizations of home ages
Determining percent of those living beneath the poverty line by census tract
Getting a Z score for housing and poverty
Creating an overall score by combining both poverty and housing Z scores (after adjusting them a bit)
Splitting the range of results between 1 and 10, creating an overall score
Joining the results to a census tract shape file and mapping it
Evolution of this methodology: STATA script via WA-DOH -> Python script via Vox -> R script via TrendCT.org

First, load a bunch of packages.

library(dplyr)
library(scales)
require(rgdal)
require(ggmap)
require(Cairo)
require(maptools)
library(ggplot2)
#devtools::install_github("hrecht/censusapi")
library(censusapi)
source("keys.R")
library(DT)

Then, pull the data via the Census API - censusapi R package.

And assess the lead risk based on a formula by WA-DOH.

housing <- getCensus(name="acs5", 
                      vintage=2014,
                      key=censuskey, 
                      vars=c("NAME", "B25034_001E", "B25034_002E", "B25034_003E", 
                             "B25034_004E","B25034_005E","B25034_006E","B25034_007E",
                             "B25034_008E","B25034_009E","B25034_010E", "B17004_001E"), 
                      region="tract:*", regionin="state:09") 

# Change state FIPS code above for a tract data for a different state-- but you'll need to bring in the matching shape file

# You can only pull one state at a time but you can loop the function to get tracts for all states with this code

# tracts <- NULL
## For all states in the fips list
# for (f in fips) {
    ## Define what state to get
    # stateget <- paste("state:", f, sep="")
    ## Get data for all tracts within that state
    # temp <- getCensus(name="acs5", vintage=2014, 
    # key=censuskey, 
    # vars=c("NAME", "B25034_001E", "B25034_002E", "B25034_003E", 
    #                         "B25034_004E","B25034_005E","B25034_006E","B25034_007E",
    #                         "B25034_008E","B25034_009E","B25034_010E", "B17004_001E"), 
    # region="tract:*", regionin=stateget)
    ## Bind to existing data
    # tracts <- rbind(tracts, temp)
# }
## You can apply this to poverty below, too

old <- housing

old$age_39 <- old$B25034_010E * .68
old$age40_59 <- (old$B25034_009E + old$B25034_008E) * .43
old$age60_79 <- (old$B25034_007E + old$B25034_006E) * .08
old$age80_99 <- (old$B25034_005E + old$B25034_004E) * .03
old$age00_10 <- (old$B25034_003E + old$B25034_002E) * 0
old$total <-  old$B25034_001E

old$risk_sum <- old$age_39 + old$age40_59 + old$age60_79 + old$age80_99 + old$age80_99 + old$age00_10
old$risk <- old$risk_sum/old$total*100
old <- subset(old, total>0)


## Displaying the new columns of the downloaded dataframe

datatable(head(old[c("NAME", "total", "risk_sum", "risk")]))

S1701 (part of the original Vox methodology) is not accessible via the Census API but I found the equivalent in B06012.

poverty <- getCensus(name="acs5", 
                      vintage=2014,
                      key=censuskey, 
                      vars=c("NAME", "B06012_001E", "B06012_002E"
                      ), 
                      region="tract:*", regionin="state:09")

poverty$percent_poverty <- poverty$B06012_002E/poverty$B06012_001E * 100
poverty <- subset(poverty, B06012_001E>0)

## Displaying the first few rows of the downloaded dataframe

datatable(head(poverty[c("NAME", "B06012_001E", "B06012_002E", "percent_poverty")]))

Determining the Z score now.

# (x-mean(x))/sd(x)

poverty$z_poverty <- (poverty$percent_poverty - mean(poverty$percent_poverty))/sd(poverty$percent_poverty)
old$z_housing <-  (old$risk - mean(old$risk))/sd(old$risk)

# Calcuating weighted figures

poverty$weighted_poverty <- poverty$z_poverty * .42
old$weighted_housing <- old$z_housing * .58

# join dataframes

poverty$tract_id <- paste0(poverty$state, poverty$county, poverty$tract)
old$tract_id <- paste0(old$state, old$county, old$tract)

old <- old[c("tract_id", "risk", "z_housing", "weighted_housing")]

df <- left_join(poverty, old)

## Joining by: "tract_id"

df$riskscore_raw <- df$weighted_housing + df$weighted_poverty

# calculating deciles on risk score

df <- mutate(df, quantile = ntile(riskscore_raw, 10))
df$id <- df$tract

## Displaying the new columns of the downloaded dataframe
datatable(head(df[c("id", "z_poverty", "weighted_poverty", "risk", "z_housing", "weighted_housing", "riskscore_raw", "quantile")]))

Generating the map

Download the census blocks for other states from the Census.

# Setting up the environment to handle spatial information
gpclibPermit()

## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release

## [1] TRUE

gpclibPermitStatus()

## [1] TRUE

# Bringing in the shapefiles in a folder downloaded from the Census
# Find other states here https://www.census.gov/geo/maps-data/data/cbf/cbf_tracts.html

censustracts <- readOGR(dsn="maps/census_tracts/wgs84", layer="tractct_37800_0000_2010_s100_census_1_shp_wgs84")

## OGR data source with driver: ESRI Shapefile 
## Source: "maps/census_tracts/wgs84", layer: "tractct_37800_0000_2010_s100_census_1_shp_wgs84"
## with 829 features
## It has 14 fields

censustracts <- fortify(censustracts, region="TRACTCE10")

risk_map <- left_join(censustracts, df)

## Joining by: "id"

ggplot() +
  geom_polygon(data = risk_map, aes(x=long, y=lat, group=group, fill=quantile), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Estimated lead risk in Connecticut", fill="")

Replicating the Vox lead risk map for CT

Andrew Ba Tran

April 6, 2016

Overview

Generating the map