This is the analysis to go with the following stories:
The data is from the U.S. Environmental Protection Agency’s Safe Drinking Water Information System (SDWIS) as of March 2016.
library(dplyr)
library(ctnamecleaner)
## Warning: replacing previous import 'dplyr::rename' by 'reshape::rename'
## when loading 'ctnamecleaner'
library(ggmap)
library(leaflet)
library(stringr)
library(lubridate)
library(DT)
Bring in the initial Lead ALE dataset filtered to Connecticut water systems.
Instructions on how to get this data is in the repo’s readme.
ct <- read.csv("data/lead_ale_samples_ct.csv", stringsAsFactors=FALSE)
head(ct[,1:6])
## EPA.Region Submission.Year Submission.Quarter PWS.ID
## 1 1 2015 4 CT0012011
## 2 1 2015 4 CT0012011
## 3 1 2015 4 CT0030011
## 4 1 2015 4 CT0030011
## 5 1 2015 4 CT0030011
## 6 1 2015 4 CT0030061
## PWS.Name Primacy.Agency.Code
## 1 HOP RIVER HOMES CT
## 2 HOP RIVER HOMES CT
## 3 ASHFORD HILLS APARTMENTS CT
## 4 ASHFORD HILLS APARTMENTS CT
## 5 ASHFORD HILLS APARTMENTS CT
## 6 MAR-LEA PARK APTS CT
Bring in data on individual water systems in Connecticut, which includes approximate address and other details not included in the other data set.
actions <- read.csv("data/actions_count.csv", stringsAsFactors=FALSE)
head(actions[,1:6])
## X name address phone
## 1 1 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## 2 2 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## 3 3 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## 4 4 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## 5 5 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## 6 6 166 & 180 BOSTON TURNPIKE BOLTON, CT 06043 860-649-2871
## violation compliance.begin
## 1 Monitoring, Regular APR-01-2009
## 2 Monitoring, Regular APR-01-2009
## 3 Monitoring, Regular APR-01-2009
## 4 Monitoring, Routine Major (TCR) JAN-01-2009
## 5 Monitoring, Regular OCT-01-2008
## 6 Monitoring, Regular OCT-01-2008
Cleaning and prepping the data for analysis.
all <- actions
all$PWS.ID <- all$water.system.id
all <- all[!duplicated(all$PWS.ID),]
ct_all <- left_join(ct, all, by="PWS.ID")
ct_all_current <- ct_all[!is.na(ct_all$X),]
ct_all_current <- ctnamecleaner(city.s.served, ct_all_current)
## Your file with fixed town names has been exported.
## Unfortunately, no matches were found for 8 They can be found in your folder. The file is called no_matches.csv
names(ct_all_current)[names(ct_all_current) == 'real.town.name'] <- 'cities.served'
ct_all_current <- ctnamecleaner(city.s.served, ct_all_current)
## Your file with fixed town names has been exported.
## Unfortunately, no matches were found for 8 They can be found in your folder. The file is called no_matches.csv
ct_all_current$town <- sub(",.*","",ct_all_current$address)
ct_all_current <- ctnamecleaner(town, ct_all_current)
## Your file with fixed town names has been exported.
## Unfortunately, no matches were found for 8 They can be found in your folder. The file is called no_matches.csv
ct_all_current$Sampling.Start.Date <- mdy(ct_all_current$Sampling.Start.Date)
ct_all_current$year.start <- year(ct_all_current$Sampling.Start.Date)
ct_all_current$Sampling.End.Date <- mdy(ct_all_current$Sampling.End.Date)
ct_all_current$year.end <- year(ct_all_current$Sampling.End.Date)
out_of_state <- ct_all_current[is.na(ct_all_current$real.town.name),]
ct_all_current <- ct_all_current[!is.na(ct_all_current$real.town.name),]
ct_all_current$name_address <- paste(ct_all_current$name, ct_all_current$address, sep=", ")
ct_all_current$ppb <- ct_all_current$Sample.Measure..mg.L.*1000
Geocoding the addresses.
# This function geocodes a location (find latitude and longitude) using the Google Maps API
geo <- geocode(location = ct_all_current$name_address, output="latlon", source="google")
# add those coordinates to our dataset
ct_all_current$lon <- geo$lon
ct_all_current$lat <- geo$lat
ct_all_current_df <- ct_all_current %>%
group_by(real.town.name) %>%
summarise(Count=n())
ct_all_current_df <- data.frame(ct_all_current_df)
ct_all_current_unique <- ct_all_current[!duplicated(ct_all_current$PWS.Name),]
ct_all_current_u_df <- ct_all_current_unique %>%
group_by(real.town.name) %>%
summarise(Count=n(), Population=sum(Population.Served.Count))
ct_all_current_u_df <- data.frame(ct_all_current_u_df)
ct_all_current_unique$name_address <- paste(ct_all_current_unique$name, ct_all_current_unique$address, sep=", ")
ct_all_current_for_map <- ct_all_current[!is.na(ct_all_current$lon),]
ct_all_current_for_map$name <- str_to_title(ct_all_current_for_map$name)
ct_all_current_for_map$Is.School.Or.Daycare.Ind <- gsub("Y", "School/Daycare", ct_all_current_for_map$Is.School.Or.Daycare.Ind)
ct_all_current_for_map$Is.School.Or.Daycare.Ind <- gsub("N", "Non-School", ct_all_current_for_map$Is.School.Or.Daycare.Ind)
pal <- colorFactor(c("navy", "red"), domain = c("Non-School", "School/Daycare"))
#Population served
pop <- ct_all_current_for_map %>%
group_by(name, address, lat, lon, city.s.served, Is.School.Or.Daycare.Ind) %>%
summarize(Pop=mean(Population.Served.Count), Year=max(year.end))
pop <- pop[order(-pop$Pop),]
pop_popup <- paste0("<b>",pop$name, "</b><br/>",
pop$address, "<br />
Town served: ", pop$city.s.served,
"<br />Population served: ", pop$Pop,
"<br />Latest year violation occurred: ", pop$Year)
m <-
leaflet(pop) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>%
setView(-72.690940, 41.651426, zoom = 8) %>%
addCircles(~lon, ~lat, weight = 1, popup=pop_popup, radius=pop$Pop*10,
color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")
m
#Biggest violations
most <- ct_all_current_for_map %>%
group_by(name) %>%
filter(ppb == max(ppb)) %>%
filter(1:n() == 1)
most <- most[order(-most$ppb),]
most_popup <- paste0("<b>",most$name, "</b><br/>",
most$address, "<br />
Town served: ", most$city.s.served,
"<br />Highest ppb detected: ",most$ppb,
"<br />Year: ", most$year.end)
m <-
leaflet(most) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>%
setView(-72.690940, 41.651426, zoom = 8) %>%
addCircles(~lon, ~lat, weight = 1, popup=most_popup, radius=most$ppb*25,
color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")
m
#Total violations
count <- ct_all_current_for_map %>%
group_by(name, address, lat, lon, city.s.served, Is.School.Or.Daycare.Ind) %>%
summarize(Count=n())
count <- count[order(-count$Count),]
count_popup <- paste0("<b>",count$name, "</b><br/>",
count$address, "<br />
Town served: ", count$city.s.served,
"<br />Lead violations: ",count$Count)
m <-
leaflet(count) %>% addTiles('http://{s}.tiles.wmflabs.org/bw-mapnik/{z}/{x}/{y}.png', attribution='© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>%
setView(-72.690940, 41.651426, zoom = 8) %>%
addCircles(~lon, ~lat, weight = 1, popup=count_popup, radius=count$Count*1000,
color= ~pal(Is.School.Or.Daycare.Ind),, stroke = TRUE, fillOpacity = .2) %>%
addLegend("bottomright", pal=pal, values=~Is.School.Or.Daycare.Ind, labels="Type'", title="Type")
ct_all_current_for_map$name_link <- paste0("<a href='", ct_all_current_for_map$link, "'>", ct_all_current_for_map$name, "</a>")
list <- ct_all_current_for_map %>%
select(name_link, ppb, year.end, Population.Served.Count, Is.School.Or.Daycare.Ind, address, city.s.served, primary.water.source.type, Owner.Type.Description)
list <- data.frame(list)
datatable(list)