This is a walkthrough to accompany the TrendCT story: How hot has it been in Connecticut this summer?
These visualizations would not have been possible without the excellent work from Bradley Boehmke’s Dayton’s Weather in 2014 and Randy Olson’s The New York Times weather chart redux.
Before we begin, you need a dataframe of historical temperature data. Please refer to the wunderground_scraper.R script on how to collect the appropriate data. This is how the data should look.
library(knitr)
airp <- read.csv("KHFD.csv", stringsAsFactors=FALSE)
# Label the town for the chart title
town <- "Hartford"
kable(head(airp))
row | date | actual_mean_temp | actual_min_temp | actual_max_temp | average_min_temp | average_max_temp | record_min_temp | record_max_temp | record_min_temp_year | record_max_temp_year |
---|---|---|---|---|---|---|---|---|---|---|
1 | 6/1/14 | 60 | 45 | 77 | 54 | 75 | 39 | 95 | 1967 | 1937 |
2 | 6/2/14 | 67 | 54 | 80 | 53 | 76 | 43 | 89 | 1946 | 1934 |
3 | 6/3/14 | 70 | 57 | 84 | 53 | 76 | 37 | 97 | 1986 | 1919 |
4 | 6/4/14 | 68 | 62 | 75 | 53 | 75 | 41 | 97 | 1984 | 1925 |
5 | 6/5/14 | 65 | 61 | 70 | 53 | 75 | 44 | 96 | 1944 | 1925 |
6 | 6/6/14 | 66 | 59 | 75 | 53 | 76 | 40 | 96 | 1964 | 1925 |
It has max and min average, actual, and record temperatures for each day.
Now let’s load the packages to work the data into something to visualize with
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
Restructuring the data so it plays nice with ggplot2
airp2 <- gather(airp, "type", "temp", 4:11)
Function to turn y-axis labels into degree formatted values and set the range from -20 to 100.
dgr_fmt <- function(x, ...) {
parse(text = paste(x, "*degree", sep = ""))
}
a <- dgr_fmt(seq(-20,100, by=10))
Bringing in a package that will allow use of Google fonts to match TrendCT style.
library(extrafont)
## Registering fonts with R
Now plotting the chart layer by layer with geom_linerange()
p <- ggplot(airp, aes(row, average_min_temp)) +
theme(plot.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
geom_linerange(airp, mapping=aes(x=row, ymin=record_min_temp, ymax=record_max_temp), colour = "sienna", alpha=.5)
p <- p +
geom_linerange(airp, mapping=aes(x=row, ymin=average_min_temp, ymax=average_max_temp), colour = "sienna1", alpha=.8)
p <- p +
geom_linerange(airp, mapping=aes(x=row, ymin=actual_min_temp, ymax=actual_max_temp), colour = "sienna4") +
geom_vline(xintercept = 0, colour = "sienna4", linetype=1, size=1)
# The colors used in the chart layers above can be replaced with any from
# http://sape.inf.usi.ch/quick-reference/ggplot2/colour
# Make the grid look pretty
p <- p +
geom_hline(yintercept = -20, colour = "white", linetype=1) +
geom_hline(yintercept = -10, colour = "white", linetype=1) +
geom_hline(yintercept = 0, colour = "white", linetype=1) +
geom_hline(yintercept = 10, colour = "white", linetype=1) +
geom_hline(yintercept = 20, colour = "white", linetype=1) +
geom_hline(yintercept = 30, colour = "white", linetype=1) +
geom_hline(yintercept = 40, colour = "white", linetype=1) +
geom_hline(yintercept = 50, colour = "white", linetype=1) +
geom_hline(yintercept = 60, colour = "white", linetype=1) +
geom_hline(yintercept = 70, colour = "white", linetype=1) +
geom_hline(yintercept = 80, colour = "white", linetype=1) +
geom_hline(yintercept = 90, colour = "white", linetype=1) +
geom_hline(yintercept = 100, colour = "white", linetype=1)
# Identifying the months based on number of days
p <- p +
#June - 30
geom_vline(xintercept = 30, colour = "wheat4", linetype=3, size=.5) +
#July - 31
geom_vline(xintercept = 61, colour = "wheat4", linetype=3, size=.5) +
# August - 31
geom_vline(xintercept = 92, colour = "wheat4", linetype=3, size=.5) +
# September - 30
geom_vline(xintercept = 122, colour = "wheat4", linetype=3, size=.5) +
# October 31
geom_vline(xintercept = 153, colour = "wheat4", linetype=3, size=.5) +
# November - 30
geom_vline(xintercept = 183, colour = "wheat4", linetype=3, size=.5) +
# December - 31
geom_vline(xintercept = 214, colour = "wheat4", linetype=3, size=.5) +
# January - 31
geom_vline(xintercept = 245, colour = "wheat4", linetype=3, size=.5) +
# February - 28
geom_vline(xintercept = 273, colour = "wheat4", linetype=3, size=.5) +
# March - 31
geom_vline(xintercept = 304, colour = "wheat4", linetype=3, size=.5) +
# April - 30
geom_vline(xintercept = 334, colour = "wheat4", linetype=3, size=.5) +
# May - 31
geom_vline(xintercept = 365, colour = "wheat4", linetype=3, size=.5) +
# June - 30
geom_vline(xintercept = 396, colour = "wheat4", linetype=3, size=.5) +
# July - 31
geom_vline(xintercept = 427, colour = "wheat4", linetype=3, size=.5)
# August - 31 (19 so far)
# Establishing the x axis
p <- p +
coord_cartesian(ylim = c(-20,100)) +
scale_y_continuous(breaks = seq(-20,100, by=10), labels = a) +
scale_x_continuous(expand = c(0, 0),
breaks = c(15,45,75,105,135,165,195,228,258,288,320,350, 380, 410, 440),
labels = c("JUN", "JUL", "AUG", "SEP", "OCT",
"NOV", "DEC", "JAN", "FEB", "MAR",
"APR", "MAY", "JUN", "JUL", "AUG"))
# Identifying the record-breaking days by comparing actual vs record
rlow3 <- airp[airp$actual_min_temp<=airp$record_min_temp,]
rhigh3 <- airp[airp$actual_max_temp>=airp$record_max_temp,]
# Adding them to the chart with specific colors
p <- p +
geom_point(data=rlow3, aes(x=row, y=record_min_temp), colour="blue3") +
geom_point(data=rhigh3, aes(x=row, y=record_max_temp), colour="red3")
# Adding a title based on the variable set above
title <- paste0(town, "'s weather since summer 2014")
# Setting the title
p <- p +
ggtitle(title) +
theme(plot.title=element_text(face="bold",hjust=.012,vjust=.8,colour="#3C3C3C",size=20, family="Lato")) +
annotate("text", x = 28, y = 98, label = "Temperature", size=4, fontface="bold", family="Lato Black")
# Now for the legend
p <- p +
annotate("segment", x = 65, xend = 65, y = -6, yend = 16, colour = "sienna", , alpha=.5, size=3) +
annotate("segment", x = 65, xend = 65, y = 0, yend = 10, colour = "sienna1", , alpha=.8, size=3) +
annotate("segment", x = 65, xend = 65, y = 2, yend = 8, colour = "sienna4", size=3) +
annotate("segment", x = 58, xend = 62, y = 10, yend = 10, colour = "gray30", size=.5) +
annotate("segment", x = 58, xend = 62, y = 0, yend = 0, colour = "gray30", size=.5) +
annotate("segment", x = 60, xend = 60, y = 10, yend = 0, colour = "gray30", size=.5) +
annotate("text", x = 32, y = 5, label = "AVERAGE RANGE", size=3, colour="gray30") +
annotate("segment", x = 68, xend = 72, y = 8, yend = 8, colour = "gray30", size=.5) +
annotate("segment", x = 68, xend = 72, y = 2, yend = 2, colour = "gray30", size=.5) +
annotate("segment", x = 70, xend = 70, y = 8, yend = 2, colour = "gray30", size=.5) +
annotate("text", x = 104, y = 5, label = "2014 - 2015 RANGE", size=3.5, colour="gray30") +
annotate("text", x = 42, y = 13, label = "RECORD HIGH", size=3, colour="gray30") +
annotate("text", x = 43, y = -3, label = "RECORD LOW", size=3, colour="gray30") +
annotate("segment", x = 67, xend = 76, y = 17, yend = 17, colour = "gray30", size=.5) +
annotate("segment", x = 67, xend = 76, y = -7, yend = -7, colour = "gray30", size=.5) +
annotate("point", x = 65, y = 17, colour = "red", size = 2) +
annotate("point", x = 65, y = -7, colour = "blue", size = 2) +
annotate("text", x = 106, y = 17, label = "NEW RECORD HIGH", size=3, colour="gray30") +
annotate("text", x = 106, y = -7, label = "NEW RECORD LOW", size=3, colour="gray30") +
annotate("text", x = 390, y = -15, label = "Source: Weather Underground", size=4, fontface="italic", colour="gray30")
print(p)
Pretty cool. To generate a new visualization, just rename the CSV and town name in the first chunk of code above.