This is an exploratory map going over the steps described by Sarah Frostenson at Vox and Rad Cunningham from the Washington State Department of Health that estimates a neighborhood’s risk for lead exposure. It inspired the TrendCT.org story Which neighborhoods have the highest risk of lead poisoning in CT?

Check out the Vox story this was based on: The risk of lead poisoning isn’t just in Flint. So we mapped the risk in every neighborhood in America.

Overview

What’s going on here


First, load a bunch of packages.

library(dplyr)
library(scales)
require(rgdal)
require(ggmap)
require(Cairo)
require(maptools)
library(ggplot2)
#devtools::install_github("hrecht/censusapi")
library(censusapi)
source("keys.R")
library(DT)

Then, pull the data via the Census API - censusapi R package.

And assess the lead risk based on a formula by WA-DOH.

housing <- getCensus(name="acs5", 
                      vintage=2014,
                      key=censuskey, 
                      vars=c("NAME", "B25034_001E", "B25034_002E", "B25034_003E", 
                             "B25034_004E","B25034_005E","B25034_006E","B25034_007E",
                             "B25034_008E","B25034_009E","B25034_010E", "B17004_001E"), 
                      region="tract:*", regionin="state:09") 

# Change state FIPS code above for a tract data for a different state-- but you'll need to bring in the matching shape file

# You can only pull one state at a time but you can loop the function to get tracts for all states with this code

# tracts <- NULL
## For all states in the fips list
# for (f in fips) {
    ## Define what state to get
    # stateget <- paste("state:", f, sep="")
    ## Get data for all tracts within that state
    # temp <- getCensus(name="acs5", vintage=2014, 
    # key=censuskey, 
    # vars=c("NAME", "B25034_001E", "B25034_002E", "B25034_003E", 
    #                         "B25034_004E","B25034_005E","B25034_006E","B25034_007E",
    #                         "B25034_008E","B25034_009E","B25034_010E", "B17004_001E"), 
    # region="tract:*", regionin=stateget)
    ## Bind to existing data
    # tracts <- rbind(tracts, temp)
# }
## You can apply this to poverty below, too

old <- housing

old$age_39 <- old$B25034_010E * .68
old$age40_59 <- (old$B25034_009E + old$B25034_008E) * .43
old$age60_79 <- (old$B25034_007E + old$B25034_006E) * .08
old$age80_99 <- (old$B25034_005E + old$B25034_004E) * .03
old$age00_10 <- (old$B25034_003E + old$B25034_002E) * 0
old$total <-  old$B25034_001E

old$risk_sum <- old$age_39 + old$age40_59 + old$age60_79 + old$age80_99 + old$age80_99 + old$age00_10
old$risk <- old$risk_sum/old$total*100
old <- subset(old, total>0)


## Displaying the new columns of the downloaded dataframe

datatable(head(old[c("NAME", "total", "risk_sum", "risk")]))


S1701 (part of the original Vox methodology) is not accessible via the Census API but I found the equivalent in B06012.

poverty <- getCensus(name="acs5", 
                      vintage=2014,
                      key=censuskey, 
                      vars=c("NAME", "B06012_001E", "B06012_002E"
                      ), 
                      region="tract:*", regionin="state:09")

poverty$percent_poverty <- poverty$B06012_002E/poverty$B06012_001E * 100
poverty <- subset(poverty, B06012_001E>0)

## Displaying the first few rows of the downloaded dataframe

datatable(head(poverty[c("NAME", "B06012_001E", "B06012_002E", "percent_poverty")]))


Determining the Z score now.

# (x-mean(x))/sd(x)

poverty$z_poverty <- (poverty$percent_poverty - mean(poverty$percent_poverty))/sd(poverty$percent_poverty)
old$z_housing <-  (old$risk - mean(old$risk))/sd(old$risk)

# Calcuating weighted figures

poverty$weighted_poverty <- poverty$z_poverty * .42
old$weighted_housing <- old$z_housing * .58

# join dataframes

poverty$tract_id <- paste0(poverty$state, poverty$county, poverty$tract)
old$tract_id <- paste0(old$state, old$county, old$tract)

old <- old[c("tract_id", "risk", "z_housing", "weighted_housing")]

df <- left_join(poverty, old)
## Joining by: "tract_id"
df$riskscore_raw <- df$weighted_housing + df$weighted_poverty

# calculating deciles on risk score

df <- mutate(df, quantile = ntile(riskscore_raw, 10))
df$id <- df$tract

## Displaying the new columns of the downloaded dataframe
datatable(head(df[c("id", "z_poverty", "weighted_poverty", "risk", "z_housing", "weighted_housing", "riskscore_raw", "quantile")]))


Generating the map

Download the census blocks for other states from the Census.

# Setting up the environment to handle spatial information
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
gpclibPermitStatus()
## [1] TRUE
# Bringing in the shapefiles in a folder downloaded from the Census
# Find other states here https://www.census.gov/geo/maps-data/data/cbf/cbf_tracts.html

censustracts <- readOGR(dsn="maps/census_tracts/wgs84", layer="tractct_37800_0000_2010_s100_census_1_shp_wgs84")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps/census_tracts/wgs84", layer: "tractct_37800_0000_2010_s100_census_1_shp_wgs84"
## with 829 features
## It has 14 fields
censustracts <- fortify(censustracts, region="TRACTCE10")

risk_map <- left_join(censustracts, df)
## Joining by: "id"
ggplot() +
  geom_polygon(data = risk_map, aes(x=long, y=lat, group=group, fill=quantile), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Estimated lead risk in Connecticut", fill="")