This is the analysis to go with the following stories:

The data is from the U.S. Environmental Protection Agency’s Safe Drinking Water Information System (SDWIS) as of March 2016.

library(dplyr)
library(ctnamecleaner)
## Warning: replacing previous import 'dplyr::rename' by 'reshape::rename'
## when loading 'ctnamecleaner'
library(ggmap)
library(leaflet)
library(stringr)
library(lubridate)
library(DT)

Bring in the initial Lead ALE dataset filtered to Connecticut water systems.

Instructions on how to get this data is in the repo’s readme.

ct <- read.csv("data/lead_ale_samples_ct.csv", stringsAsFactors=FALSE)
head(ct[,1:6])
##   EPA.Region Submission.Year Submission.Quarter    PWS.ID
## 1          1            2015                  4 CT0012011
## 2          1            2015                  4 CT0012011
## 3          1            2015                  4 CT0030011
## 4          1            2015                  4 CT0030011
## 5          1            2015                  4 CT0030011
## 6          1            2015                  4 CT0030061
##                   PWS.Name Primacy.Agency.Code
## 1          HOP RIVER HOMES                  CT
## 2          HOP RIVER HOMES                  CT
## 3 ASHFORD HILLS APARTMENTS                  CT
## 4 ASHFORD HILLS APARTMENTS                  CT
## 5 ASHFORD HILLS APARTMENTS                  CT
## 6        MAR-LEA PARK APTS                  CT

Bring in data on individual water systems in Connecticut, which includes approximate address and other details not included in the other data set.

actions <- read.csv("data/actions_count.csv", stringsAsFactors=FALSE)
head(actions[,1:6])
##   X                      name           address        phone
## 1 1 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
## 2 2 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
## 3 3 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
## 4 4 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
## 5 5 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
## 6 6 166 & 180 BOSTON TURNPIKE BOLTON, CT  06043 860-649-2871
##                         violation compliance.begin
## 1             Monitoring, Regular      APR-01-2009
## 2             Monitoring, Regular      APR-01-2009
## 3             Monitoring, Regular      APR-01-2009
## 4 Monitoring, Routine Major (TCR)      JAN-01-2009
## 5             Monitoring, Regular      OCT-01-2008
## 6             Monitoring, Regular      OCT-01-2008

Cleaning and prepping the data for analysis.

all <- actions
all$PWS.ID <- all$water.system.id
all <- all[!duplicated(all$PWS.ID),]

ct_all <- left_join(ct, all, by="PWS.ID")

ct_all_current <- ct_all[!is.na(ct_all$X),]
ct_all_current <- ctnamecleaner(city.s.served, ct_all_current)
## Your file with fixed town names has been exported. 
## Unfortunately, no matches were found for  8  They can be found in your folder. The file is called no_matches.csv
names(ct_all_current)[names(ct_all_current) == 'real.town.name'] <- 'cities.served'
ct_all_current <- ctnamecleaner(city.s.served, ct_all_current)
## Your file with fixed town names has been exported. 
## Unfortunately, no matches were found for  8  They can be found in your folder. The file is called no_matches.csv
ct_all_current$town <-  sub(",.*","",ct_all_current$address)
ct_all_current <- ctnamecleaner(town, ct_all_current)
## Your file with fixed town names has been exported. 
## Unfortunately, no matches were found for  8  They can be found in your folder. The file is called no_matches.csv
ct_all_current$Sampling.Start.Date <- mdy(ct_all_current$Sampling.Start.Date)
ct_all_current$year.start <- year(ct_all_current$Sampling.Start.Date)

ct_all_current$Sampling.End.Date <- mdy(ct_all_current$Sampling.End.Date)
ct_all_current$year.end <- year(ct_all_current$Sampling.End.Date)

out_of_state <- ct_all_current[is.na(ct_all_current$real.town.name),]
ct_all_current <- ct_all_current[!is.na(ct_all_current$real.town.name),]

ct_all_current$name_address <- paste(ct_all_current$name, ct_all_current$address, sep=", ")
ct_all_current$ppb <- ct_all_current$Sample.Measure..mg.L.*1000

Geocoding the addresses.

# This function geocodes a location (find latitude and longitude) using the Google Maps API
geo <- geocode(location = ct_all_current$name_address, output="latlon", source="google")

# add those coordinates to our dataset
ct_all_current$lon <- geo$lon
ct_all_current$lat <- geo$lat

Prepping the dataframes for mapping.

ct_all_current_df <- ct_all_current %>%
  group_by(real.town.name) %>%
  summarise(Count=n())

ct_all_current_df <- data.frame(ct_all_current_df)

ct_all_current_unique <- ct_all_current[!duplicated(ct_all_current$PWS.Name),]

ct_all_current_u_df <- ct_all_current_unique %>%
  group_by(real.town.name) %>%
  summarise(Count=n(), Population=sum(Population.Served.Count))
ct_all_current_u_df <- data.frame(ct_all_current_u_df)

ct_all_current_unique$name_address <- paste(ct_all_current_unique$name, ct_all_current_unique$address, sep=", ")

ct_all_current_for_map <- ct_all_current[!is.na(ct_all_current$lon),]
ct_all_current_for_map$name <- str_to_title(ct_all_current_for_map$name)
ct_all_current_for_map$Is.School.Or.Daycare.Ind <- gsub("Y", "School/Daycare", ct_all_current_for_map$Is.School.Or.Daycare.Ind)
ct_all_current_for_map$Is.School.Or.Daycare.Ind <- gsub("N", "Non-School", ct_all_current_for_map$Is.School.Or.Daycare.Ind)

pal <- colorFactor(c("navy", "red"), domain = c("Non-School", "School/Daycare"))

Population served by water systems where lead was detected

#Population served
pop <- ct_all_current_for_map %>%
  group_by(name, address, lat, lon, city.s.served, Is.School.Or.Daycare.Ind) %>%
  summarize(Pop=mean(Population.Served.Count), Year=max(year.end))

pop <- pop[order(-pop$Pop),] 

pop_popup <- paste0("<b>",pop$name, "</b><br/>",
                     pop$address, "<br />
                    Town served: ", pop$city.s.served, 
                     "<br />Population served: ", pop$Pop,
                     "<br />Latest year violation occurred: ", pop$Year)


m <- 
  
  leaflet(pop) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.651426, zoom = 8) %>% 
  addCircles(~lon, ~lat, weight = 1, popup=pop_popup, radius=pop$Pop*10, 
             color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
  addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")

m


Largest amount of lead detected in drinking water

#Biggest violations
  most <- ct_all_current_for_map %>%
  group_by(name) %>%
  filter(ppb == max(ppb)) %>% 
  filter(1:n() == 1)
  
most <- most[order(-most$ppb),] 

most_popup <- paste0("<b>",most$name, "</b><br/>",
                      most$address, "<br />
                      Town served: ", most$city.s.served, 
                      "<br />Highest ppb detected: ",most$ppb,
                      "<br />Year: ", most$year.end)


m <- 
  
  leaflet(most) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.651426, zoom = 8) %>% 
  addCircles(~lon, ~lat, weight = 1, popup=most_popup, radius=most$ppb*25, 
           color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
  addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")


m


How often lead was detected in drinking water

#Total violations

count <- ct_all_current_for_map %>%
  group_by(name, address, lat, lon, city.s.served, Is.School.Or.Daycare.Ind) %>%
  summarize(Count=n()) 

count <- count[order(-count$Count),] 

count_popup <- paste0("<b>",count$name, "</b><br/>",
                      count$address, "<br />
                      Town served: ", count$city.s.served, 
                      "<br />Lead violations: ",count$Count)

m <- 
  
  leaflet(count) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.651426, zoom = 8) %>% 
  addCircles(~lon, ~lat, weight = 1, popup=count_popup, radius=count$Count*1000, 
           color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
  addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")

The searchable list.

ct_all_current_for_map$name_link <- paste0("<a href='", ct_all_current_for_map$link, "'>", ct_all_current_for_map$name, "</a>")

list <- ct_all_current_for_map %>%
  select(name_link, ppb, year.end, Population.Served.Count,   Is.School.Or.Daycare.Ind, address, city.s.served, primary.water.source.type, Owner.Type.Description)

list <- data.frame(list)
datatable(list)