The dataset by proposed by Jimmy Ng, the details can be found here.
For the state of New York, the average maximum temperature is about 33.5°C and occurs in July. The lowest is -21.7°C in January.
suppressMessages(library(xml2))
suppressMessages(library(rvest))
suppressMessages(library(tidyverse))
df <-
read_html("https://en.wikipedia.org/wiki/Climate_of_New_York") %>%
html_nodes("table") %>%
.[[3]] %>%
html_table(trim = TRUE)
df2 <- df[1:15,] %>%
setNames(as.character(df[1,])) %>% #Replace names with first column
rename(Measure = Month) %>% #Rename column
filter(str_detect(Measure,"(°C)")) %>%
gather("Month","Temp",2:13) %>%
separate(Temp, into = c("F", "C"),sep = "\\(") %>% #Put temp scales in different columns
mutate(C = str_replace(C, "\\)", "")) %>%
mutate(Measure = str_replace(Measure, "°F \\(°C\\)", "")) %>%
gather("Scale","Temp",4:5) %>%
#mutate(Temp = type.convert(Temp)) %>%
select (-c(Year))
head(df2)
## Measure Month Scale Temp
## 1 Record high Jan F 71
## 2 Mean maximum Jan F 52.8
## 3 Average high Jan F 30.6
## 4 Daily mean Jan F 22.6
## 5 Average low Jan F 14.5
## 6 Mean minimum Jan F -7.1
numextract <- function(string){
str_extract(string, "\\-*\\d+\\.*\\d*")
}
#Extraction of numbers from a character string. (n.d.). Retrieved March 10, 2019, from http://stla.github.io/stlapblog/posts/Numextract.html
dash <- substring(df2$Temp[7],1,1)
df3 <-
df2 %>%
filter(str_detect(Measure, "Mean maximum") | str_detect(Measure, "Mean minimum")) %>%
filter(Scale == "C") %>%
mutate(Temp = ifelse(substring(Temp,1,1) == dash,as.numeric(numextract(Temp))*(-1), as.numeric(numextract(Temp))))
(df3)
## Measure Month Scale Temp
## 1 Mean maximum Jan C 11.6
## 2 Mean minimum Jan C -21.7
## 3 Mean maximum Feb C 11.9
## 4 Mean minimum Feb C -19.3
## 5 Mean maximum Mar C 20.1
## 6 Mean minimum Mar C -14.2
## 7 Mean maximum Apr C 27.6
## 8 Mean minimum Apr C -4.9
## 9 Mean maximum May C 30.4
## 10 Mean minimum May C 0.4
## 11 Mean maximum Jun C 32.8
## 12 Mean minimum Jun C 5.6
## 13 Mean maximum Jul C 33.5
## 14 Mean minimum Jul C 9.7
## 15 Mean maximum Aug C 32.9
## 16 Mean minimum Aug C 8.1
## 17 Mean maximum Sep C 30.0
## 18 Mean minimum Sep C 2.3
## 19 Mean maximum Oct C 24.9
## 20 Mean minimum Oct C -3.4
## 21 Mean maximum Nov C 20.1
## 22 Mean minimum Nov C -8.9
## 23 Mean maximum Dec C 12.7
## 24 Mean minimum Dec C -16.9
# Line plot with multiple groups
ggplot(data=df3, aes(x=Month, y=Temp, group=Measure)) +
scale_x_discrete(limits = month.abb)+
labs(title = "Temperatures of the State of New York",
subtitle = "Average Maximum & Minimum (°C)",
caption = "Source: Wikipedia contributors",
x = "Month", y = "Average Temperature (°C)")+
theme(legend.position = "none") +
geom_line(aes(colour=Measure, group=Measure))
#geom_line(aes(color=Measure))+
#geom_line()
Wikipedia contributors. “Climate of New York.” Wikipedia, The Free Encyclopedia. Wikipedia, The Free Encyclopedia, 8 Mar. 2019. Web. 10 Mar. 2019.