This is the methodology used for the story: The impact of HIV in Connecticut and across the country
Visit the repo for the data used in this analysis.
Data for this analysis was provided by AIDSVu.org, compiled from the Centers for Disease Control & Prevention, the U.S. Census, and local health departments.
library(readxl)
library(corrplot)
library(dplyr)
library(stringr)
library(dplyr)
library(tidyr)
library(rgeos)
library(maptools)
library(ggplot2) # devtools::install_github("hadley/ggplot2") only if you want subtitles/captions
library(ggalt)
library(ggthemes)
library(albersusa) # devtools::install_github("hrbrmstr/albersusa")
library(viridis)
library(scales)
library(knitr)
y2013 <- read_excel("data/AIDSVu_County_Prev_2013.xlsx", sheet=1, skip=2)
y2014 <- read_excel("data/AIDSVu_County_NewDX_2014.xlsx", sheet=1, skip=2)
colnames(y2013) <- make.names(colnames(y2013))
colnames(y2014) <- make.names(colnames(y2014))
y2013$GEO.ID <- ifelse(nchar(y2013$GEO.ID) == 4, paste0("0", y2013$GEO.ID), y2013$GEO.ID)
y2014$GEO.ID <- ifelse(nchar(y2014$GEO.ID) == 4, paste0("0", y2014$GEO.ID), y2014$GEO.ID)
y2013nums <- y2013
y2013nums$State.Abbreviation <- NULL
y2013nums$State <- NULL
y2013nums$County.Name <- NULL
y2013nums$County.Rate.Stability <- NULL
y2013nums$Male.Rate.Stability <- NULL
y2013nums$Female.Rate.Stability <- NULL
y2013nums$Black.Rate.Stability <- NULL
y2013nums$Male.Rate.Stability <- NULL
y2013nums$White.Rate.Stability <- NULL
y2013nums$Hispanic.Rate.Stability <- NULL
y2013nums$Age.13.24.Rate.Stability <- NULL
y2013nums$Age.25.34.Rate.Stability <- NULL
y2013nums$Age.35.44.Rate.Stability <- NULL
y2013nums$Age.45.54.Rate.Stability <- NULL
y2013nums$Age.55..Rate.Stability <- NULL
y2013b <- y2013
state_sum <- y2013 %>%
group_by(State) %>%
summarise(Rate=mean(County.Rate))
colnames(state_sum) <- c("County", "Overall.Rate")
state_black <- y2013 %>%
group_by(State) %>%
summarise(Rate=mean(Black.Rate))
colnames(state_black) <- c("County", "Black.Rate")
state_hispanic <- y2013 %>%
group_by(State) %>%
summarise(Rate=mean(Hispanic.Rate))
colnames(state_hispanic) <- c("County", "Hispanic.Rate")
state_white <- y2013 %>%
group_by(State) %>%
summarise(Rate=mean(White.Rate))
colnames(state_white) <- c("County", "White.Rate")
state_all <- left_join(state_sum, state_black)
## Joining by: "County"
state_all <- left_join(state_all, state_hispanic)
## Joining by: "County"
state_all <- left_join(state_all, state_white)
## Joining by: "County"
kable(state_all)
County | Overall.Rate | Black.Rate | Hispanic.Rate | White.Rate |
---|---|---|---|---|
Alabama | 215.208955 | 459.686567 | 88.358209 | 75.835821 |
Alaska | -4.000000 | -4.000000 | -4.000000 | -4.000000 |
Arizona | 133.800000 | 270.933333 | 155.800000 | 94.066667 |
Arkansas | 138.226667 | 252.973333 | 77.973333 | 70.013333 |
California | 185.448276 | 599.224138 | 188.586207 | 191.344828 |
Colorado | 88.250000 | 207.109375 | 70.734375 | 73.187500 |
Connecticut | 245.125000 | 992.000000 | 616.250000 | 129.875000 |
Delaware | 333.000000 | -2.000000 | -2.000000 | -2.000000 |
Florida | 460.283582 | 1713.343284 | 452.328358 | 212.059701 |
Georgia | 314.672956 | 803.433962 | 167.075472 | 90.088050 |
Hawaii | 121.000000 | 319.200000 | 140.000000 | 230.800000 |
Idaho | 24.045455 | -2.000000 | -2.000000 | -2.000000 |
Illinois | 112.500000 | 26.029412 | 7.794118 | 2.931373 |
Indiana | 76.902174 | 7.695652 | 3.000000 | 2.141304 |
Iowa | 28.101010 | 54.242424 | 17.191919 | 19.383838 |
Kansas | 30.257143 | 33.304762 | 12.857143 | 6.752381 |
Kentucky | 73.941667 | 60.608333 | 31.025000 | 11.325000 |
Louisiana | 332.359375 | 700.687500 | 622.109375 | 131.109375 |
Maine | 70.625000 | 243.562500 | 95.750000 | 62.375000 |
Maryland | 367.958333 | 1027.375000 | 277.791667 | 135.125000 |
Massachusetts | 274.071429 | 1077.357143 | 775.214286 | 176.642857 |
Michigan | 61.289157 | 221.289157 | 55.361446 | 36.722892 |
Minnesota | 38.770115 | 208.747126 | 129.850575 | 20.701149 |
Mississippi | 289.695122 | 536.329268 | 95.865854 | 68.670732 |
Missouri | 78.956522 | 255.634783 | 50.269565 | 54.521739 |
Montana | 9.107143 | -1.000000 | 1.964286 | 7.250000 |
Nebraska | 16.569893 | 21.559140 | 5.129032 | 1.462366 |
Nevada | 86.647059 | 414.764706 | 72.764706 | 74.529412 |
New Hampshire | 82.600000 | -2.000000 | -2.000000 | -2.000000 |
New Jersey | 428.476190 | 1522.761905 | 663.714286 | 174.047619 |
New Mexico | 96.151515 | 276.969697 | 93.181818 | 70.545455 |
New York | 344.322581 | 1509.048387 | 1895.822581 | 136.274193 |
North Carolina | 242.230000 | 669.300000 | 170.500000 | 87.260000 |
North Dakota | 3.792453 | -1.000000 | -1.000000 | -1.000000 |
Ohio | 96.761364 | 464.920455 | 98.772727 | 65.238636 |
Oklahoma | 74.649351 | 131.883117 | 38.675325 | 58.311688 |
Oregon | 67.638889 | 130.916667 | 75.944444 | 60.333333 |
Pennsylvania | 129.477612 | 645.462687 | 397.194030 | 72.164179 |
Puerto Rico | -4.000000 | -4.000000 | -4.000000 | -4.000000 |
Rhode Island | 140.600000 | 541.400000 | 235.400000 | 102.800000 |
South Carolina | 408.934783 | 867.152174 | 251.934783 | 112.021739 |
South Dakota | -2.000000 | -2.000000 | -2.000000 | -2.000000 |
Tennessee | 118.305263 | 313.463158 | 77.789474 | 75.031579 |
Texas | 137.102362 | 317.090551 | 104.023622 | 61.023622 |
Utah | 25.172414 | 135.551724 | 26.000000 | 20.655172 |
Vermont | 58.285714 | 157.785714 | 33.071429 | 51.428571 |
Virginia | 231.917293 | 659.383459 | 109.022556 | 98.646616 |
Washington | 77.538461 | 243.923077 | 89.256410 | 64.256410 |
West Virginia | 77.290909 | 264.854545 | 46.109091 | 45.236364 |
Wisconsin | 48.666667 | 184.152778 | 44.597222 | 29.791667 |
Wyoming | 32.478261 | -2.000000 | -2.000000 | -2.000000 |
NA | NA | NA | NA | NA |
Can swap out state abbrevation to get the data for any state.
ct_2013 <- subset(y2013, State.Abbreviation=="CT")
ct_2013 <- ct_2013 %>%
select(County.Name, County.Rate, Black.Rate, White.Rate, Hispanic.Rate)
ct_2013$County.Name <- gsub(" County", "", ct_2013$County.Name)
kable(ct_2013)
County.Name | County.Rate | Black.Rate | White.Rate | Hispanic.Rate |
---|---|---|---|---|
Fairfield | 343 | 1228 | 143 | 632 |
Hartford | 406 | 966 | 157 | 1156 |
Litchfield | 112 | 769 | 97 | 221 |
Middlesex | 176 | 920 | 118 | 646 |
New Haven | 422 | 1300 | 195 | 847 |
New London | 220 | 1015 | 140 | 507 |
Tolland | 92 | 365 | 78 | 204 |
Windham | 190 | 1373 | 111 | 717 |
cmap <- fortify(counties_composite(), region="fips")
y2013 <- subset(y2013b, County.Rate>1)
y2013$breaks <- "NA"
y2013$breaks <- ifelse(y2013$County.Rate < 41, "0-40", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 41 & y2013$County.Rate <= 60, "41-60", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 61 & y2013$County.Rate <= 70, "61-70", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 71 & y2013$County.Rate <= 90, "71-90", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 91 & y2013$County.Rate <= 110, "91-110", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 111 & y2013$County.Rate <= 140, "111-140", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 141 & y2013$County.Rate <= 190, "141-190", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 191 & y2013$County.Rate <= 260, "191-260", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 261 & y2013$County.Rate <= 410, "260-410", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Rate >= 411, "411+", y2013$breaks)
y2013$breaks <- factor(y2013$breaks, levels=c("0-40", "41-60", "61-70", "71-90", "91-110", "111-140", "141-190", "191-260", "260-410","411+"))
gg <- ggplot()
gg <- gg + geom_map(data=cmap, map=cmap,
aes(x=long, y=lat, map_id=id),
color="#2b2b2b", size=0.05, fill=NA)
gg <- gg + geom_map(data=y2013, map=cmap,
aes(fill=breaks, map_id=GEO.ID),
color="#2b2b2b", size=0.05, na.rm=TRUE)
gg <- gg + scale_fill_manual(values = c('#f7fcf0','#e0f3db','#ccebc5','#a8ddb5','#7bccc4','#4eb3d3','#2b8cbe','#0868ac','#084081', "#081d58"), name="Rate")
gg <- gg + labs(title="Rates of persons living with diagnosed HIV",
subtitle="In 2013 (per 100,000 residents)",
caption="TrendCT.org\nSOURCE: AIDSVu")
gg <- gg + coord_proj(us_laea_proj)
gg <- gg + theme_map(base_family="Arial Narrow")
gg <- gg + theme(legend.position=c(0.8, 0.25))
gg <- gg + theme(plot.title=element_text(face="bold", size=14, margin=margin(b=6)))
gg <- gg + theme(plot.subtitle=element_text(size=10, margin=margin(b=-14)))
gg
y2013 <- subset(y2013b, County.Cases>=1)
y2013$breaks <- "NA"
y2013$breaks <- ifelse(y2013$County.Cases >= 5 & y2013$County.Cases <=8 , "5-8", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 9 & y2013$County.Cases <= 12, "9-12", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 13 & y2013$County.Cases <= 18, "13-18", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 19 & y2013$County.Cases <= 24, "19-24", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 25 & y2013$County.Cases <= 36, "25-36", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 37 & y2013$County.Cases <= 56, "37-56", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 57 & y2013$County.Cases <= 96, "57-96", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 97 & y2013$County.Cases <= 166, "97-166", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 167 & y2013$County.Cases <= 518, "167-518", y2013$breaks)
y2013$breaks <- ifelse(y2013$County.Cases >= 519, "519+", y2013$breaks)
y2013$breaks <- factor(y2013$breaks, levels = c("5-8", "9-12", "13-18", "19-24", "25-36", "37-56", "57-96", "97-166", "167-518","519+"))
gg <- ggplot()
gg <- gg + geom_map(data=cmap, map=cmap,
aes(x=long, y=lat, map_id=id),
color="#2b2b2b", size=0.05, fill=NA)
gg <- gg + geom_map(data=y2013, map=cmap,
aes(fill=breaks, map_id=GEO.ID),
color="#2b2b2b", size=0.05)
gg <- gg + scale_fill_manual(values = c('#f7fcf0','#e0f3db','#ccebc5','#a8ddb5','#7bccc4','#4eb3d3','#2b8cbe','#0868ac','#084081', "#081d58"), name="Cases")
gg <- gg + labs(title="Number of persons living with diagnosed HIV in 2013",
caption="TrendCT.org\nSOURCE: AIDSVu")
gg <- gg + coord_proj(us_laea_proj)
gg <- gg + theme_map(base_family="Arial Narrow")
gg <- gg + theme(legend.position=c(0.8, 0.25))
gg <- gg + theme(plot.title=element_text(face="bold", size=14, margin=margin(b=6)))
gg <- gg + theme(plot.subtitle=element_text(size=10, margin=margin(b=-14)))
gg
y2014b <- y2014
y2014 <- subset(y2014b, New.Diagnoses.Rate>1)
y2014$breaks <- "NA"
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate < 6, "0-6", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 7 & y2014$New.Diagnoses.Rate <= 8, "7-8", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 9 & y2014$New.Diagnoses.Rate <= 9, "9-9", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 10 & y2014$New.Diagnoses.Rate <= 11, "10-11", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 12 & y2014$New.Diagnoses.Rate <= 14, "12-14", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 15 & y2014$New.Diagnoses.Rate <= 17, "15-17", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 18 & y2014$New.Diagnoses.Rate <= 21, "18-21", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 22 & y2014$New.Diagnoses.Rate <= 28, "22-28", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 29 & y2014$New.Diagnoses.Rate <= 40, "29-40", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Rate >= 41, "41+", y2014$breaks)
y2014$breaks <- factor(y2014$breaks, levels=c("0-6", "7-8", "9-9", "10-11", "12-14", "15-17", "18-21", "22-28", "29-40","41+"))
gg <- ggplot()
gg <- gg + geom_map(data=cmap, map=cmap,
aes(x=long, y=lat, map_id=id),
color="#2b2b2b", size=0.05, fill=NA)
gg <- gg + geom_map(data=y2014, map=cmap,
aes(fill=breaks, map_id=GEO.ID),
color="#2b2b2b", size=0.05)
gg <- gg + scale_fill_manual(values = c('#fff7ec','#fee8c8','#fdd49e','#fdbb84','#fc8d59','#ef6548','#d7301f','#b30000','#7f0000', '#67000d'), name="Rate")
gg <- gg + labs(title="Rates of persons newly diagnosed with HIV",
subtitle="In 2014 (per 100,000)",
caption="TrendCT.org\nSOURCE: AIDSVu")
gg <- gg + coord_proj(us_laea_proj)
gg <- gg + theme_map(base_family="Arial Narrow")
gg <- gg + theme(legend.position=c(0.8, 0.25))
gg <- gg + theme(plot.title=element_text(face="bold", size=14, margin=margin(b=6)))
gg <- gg + theme(plot.subtitle=element_text(size=10, margin=margin(b=-14)))
gg
y2014 <- subset(y2014b, New.Diagnoses.Cases>1)
y2014$breaks <- "NA"
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases == 5, "5", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases == 6, "6", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 7 & y2014$New.Diagnoses.Cases <= 8, "7-8", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 9 & y2014$New.Diagnoses.Cases <= 10, "9-10", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 11 & y2014$New.Diagnoses.Cases <= 13, "11-13", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 14 & y2014$New.Diagnoses.Cases <= 19, "14-19", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 20 & y2014$New.Diagnoses.Cases <= 28, "20-28", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 29 & y2014$New.Diagnoses.Cases <= 49, "29-49", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 50 & y2014$New.Diagnoses.Cases <= 111, "50-111", y2014$breaks)
y2014$breaks <- ifelse(y2014$New.Diagnoses.Cases >= 112, "112+", y2014$breaks)
y2014$breaks <- factor(y2014$breaks, levels=c("5", "6", "7-8", "9-10", "11-13", "14-19", "20-28", "29-49", "50-111","112+"))
gg <- ggplot()
gg <- gg + geom_map(data=cmap, map=cmap,
aes(x=long, y=lat, map_id=id),
color="#2b2b2b", size=0.05, fill=NA)
gg <- gg + geom_map(data=y2014, map=cmap,
aes(fill=breaks, map_id=GEO.ID),
color="#2b2b2b", size=0.05)
gg <- gg + scale_fill_manual(values = c('#fff7ec','#fee8c8','#fdd49e','#fdbb84','#fc8d59','#ef6548','#d7301f','#b30000','#7f0000', '#67000d'), name="Cases")
gg <- gg + labs(title="Number of persons newly diagnosed with HIV in 2014",
caption="TrendCT.org\nSOURCE: AIDSVu")
gg <- gg + coord_proj(us_laea_proj)
gg <- gg + theme_map(base_family="Arial Narrow")
gg <- gg + theme(legend.position=c(0.8, 0.25))
gg <- gg + theme(plot.title=element_text(face="bold", size=14, margin=margin(b=6)))
gg <- gg + theme(plot.subtitle=element_text(size=10, margin=margin(b=-14)))
gg
ct_2014 <- subset(y2014, State.Abbreviation=="CT")
ct_2014 <- ct_2014 %>%
select(County.Name, New.Diagnoses.Rate, New.Diagnoses.Cases)
ct_2014$County.Name <- gsub(" County", "", ct_2014$County.Name)
kable(ct_2014)
County.Name | New.Diagnoses.Rate | New.Diagnoses.Cases |
---|---|---|
Fairfield | 13 | 101 |
Hartford | 11 | 81 |
Middlesex | 7 | 10 |
New Haven | 12 | 86 |
New London | 7 | 16 |
Tolland | 4 | 5 |