Snowfall Datasets from 1940-2016 in inches.
library(tidyverse)
library(gganimate)
library(lubridate)
library(png)
library(grid)
library(RCurl)
snowfall_data <- rio::import("https://raw.githubusercontent.com/shahnp/data/master/Bos_NYC_Chicago_snowfall.txt")
head(snowfall_data)
## Winter Boston Chicago NYC
## 1 1940-1941 47.8 52.5 39.0
## 2 1941-1942 23.9 29.8 11.3
## 3 1942-1943 45.7 45.2 29.5
## 4 1943-1944 27.7 24.0 23.8
## 5 1944-1945 59.2 34.9 27.1
## 6 1945-1946 50.8 23.9 31.4
tail(snowfall_data)
## Winter Boston Chicago NYC
## 71 2010-2011 81.0 57.9 61.9
## 72 2011-2012 9.3 19.8 7.4
## 73 2012-2013 63.4 30.1 26.1
## 74 2013-2014 58.9 82.0 57.4
## 75 2014-2015 110.6 50.7 50.3
## 76 2015-2016 36.2 31.2 32.1
Let’s find Minimum Snowfall for all three cities.
min(snowfall_data$Boston)
## [1] 9.3
min(snowfall_data$Chicago)
## [1] 14.3
min(snowfall_data$NYC)
## [1] 2.8
Let’s find Maximum Snowfall for all three cities.
max(snowfall_data$Boston)
## [1] 110.6
max(snowfall_data$Chicago)
## [1] 89.7
max(snowfall_data$NYC)
## [1] 75.6
Give me Min & Max Snowfall of each cities.
t(apply(snowfall_data, MARGIN = 2, function(x) range(x, na.rm=TRUE)))
## [,1] [,2]
## Winter "1940-1941" "2015-2016"
## Boston " 9.3" "110.6"
## Chicago "14.3" "89.7"
## NYC " 2.8" "75.6"
Maximum Snowfall
bos_max <- snowfall_data[which.max(snowfall_data$Boston), ]
chic_max <- snowfall_data[which.max(snowfall_data$Chicago), ]
nyc_max <- snowfall_data[which.max(snowfall_data$NYC), ]
max_snowfall <- rbind(bos_max, chic_max, nyc_max)
max_snowfall
## Winter Boston Chicago NYC
## 75 2014-2015 110.6 50.7 50.3
## 39 1978-1979 27.5 89.7 29.4
## 56 1995-1996 107.6 23.9 75.6
Minimum Snowfall
bos_min <- snowfall_data[which.min(snowfall_data$Boston), ]
chic_min <- snowfall_data[which.min(snowfall_data$Chicago), ]
nyc_min <- snowfall_data[which.min(snowfall_data$NYC), ]
min_snowfall <- rbind(bos_min, chic_min, nyc_min)
min_snowfall
## Winter Boston Chicago NYC
## 72 2011-2012 9.3 19.8 7.4
## 9 1948-1949 37.1 14.3 46.6
## 33 1972-1973 10.3 32.9 2.8
long_snowfall <- snowfall_data %>% gather(City, total_snowfall, na.rm = FALSE, -Winter)
long_snowfall <- long_snowfall %>% group_by(Winter)
head(long_snowfall,20)
## # A tibble: 20 x 3
## # Groups: Winter [20]
## Winter City total_snowfall
## <chr> <chr> <dbl>
## 1 1940-1941 Boston 47.8
## 2 1941-1942 Boston 23.9
## 3 1942-1943 Boston 45.7
## 4 1943-1944 Boston 27.7
## 5 1944-1945 Boston 59.2
## 6 1945-1946 Boston 50.8
## 7 1946-1947 Boston 19.4
## 8 1947-1948 Boston 89.2
## 9 1948-1949 Boston 37.1
## 10 1949-1950 Boston 32
## 11 1950-1951 Boston 29.7
## 12 1951-1952 Boston 31.9
## 13 1952-1953 Boston 29.8
## 14 1953-1954 Boston 23.6
## 15 1954-1955 Boston 25.1
## 16 1955-1956 Boston 60.9
## 17 1956-1957 Boston 52
## 18 1957-1958 Boston 44.7
## 19 1958-1959 Boston 34.1
## 20 1959-1960 Boston 40.9
tail(long_snowfall,20)
## # A tibble: 20 x 3
## # Groups: Winter [20]
## Winter City total_snowfall
## <chr> <chr> <dbl>
## 1 1996-1997 NYC 10
## 2 1997-1998 NYC 5.5
## 3 1998-1999 NYC 12.7
## 4 1999-2000 NYC 16.3
## 5 2000-2001 NYC 35
## 6 2001-2002 NYC 3.5
## 7 2002-2003 NYC 49.3
## 8 2003-2004 NYC 42.6
## 9 2004-2005 NYC 41
## 10 2005-2006 NYC 40
## 11 2006-2007 NYC 12.4
## 12 2007-2008 NYC 11.9
## 13 2008-2009 NYC 27.6
## 14 2009-2010 NYC 51.4
## 15 2010-2011 NYC 61.9
## 16 2011-2012 NYC 7.4
## 17 2012-2013 NYC 26.1
## 18 2013-2014 NYC 57.4
## 19 2014-2015 NYC 50.3
## 20 2015-2016 NYC 32.1
Lets set up static plot for year 1940-41.
long_snowfall_data_1940_41 <- long_snowfall %>%
filter(Winter == "1940-1941" | Winter == "1941-1942" |Winter == "1942-1943")
head(long_snowfall_data_1940_41)
## # A tibble: 6 x 3
## # Groups: Winter [3]
## Winter City total_snowfall
## <chr> <chr> <dbl>
## 1 1940-1941 Boston 47.8
## 2 1941-1942 Boston 23.9
## 3 1942-1943 Boston 45.7
## 4 1940-1941 Chicago 52.5
## 5 1941-1942 Chicago 29.8
## 6 1942-1943 Chicago 45.2
ggplot(long_snowfall,
aes(x = City, y = total_snowfall, label = City, colour = City))+
geom_point(stat = "identity", size = 3.5)+
geom_segment(aes (x = City,
y = 2.8,
xend = City,
yend = total_snowfall)) +
#geom_text(color = "black", size = 3)+
coord_flip()+
theme(legend.position = "none")
long_snowfall_data_1940_41 %>% head(2)
## # A tibble: 2 x 3
## # Groups: Winter [2]
## Winter City total_snowfall
## <chr> <chr> <dbl>
## 1 1940-1941 Boston 47.8
## 2 1941-1942 Boston 23.9
head(snowfall_data,2)
## Winter Boston Chicago NYC
## 1 1940-1941 47.8 52.5 39.0
## 2 1941-1942 23.9 29.8 11.3
snowfall_data <- snowfall_data %>%
separate(Winter, into = c("begin", "end"), sep = "-")
head(snowfall_data)
## begin end Boston Chicago NYC
## 1 1940 1941 47.8 52.5 39.0
## 2 1941 1942 23.9 29.8 11.3
## 3 1942 1943 45.7 45.2 29.5
## 4 1943 1944 27.7 24.0 23.8
## 5 1944 1945 59.2 34.9 27.1
## 6 1945 1946 50.8 23.9 31.4
snowfall_data$begin <- lubridate::ymd(snowfall_data$begin,truncated = 2L)
snowfall_data$end <- lubridate::ymd(snowfall_data$end,truncated = 2L)
glimpse(snowfall_data)
## Observations: 76
## Variables: 5
## $ begin <date> 1940-01-01, 1941-01-01, 1942-01-01, 1943-01-01, 1944-01…
## $ end <date> 1941-01-01, 1942-01-01, 1943-01-01, 1944-01-01, 1945-01…
## $ Boston <dbl> 47.8, 23.9, 45.7, 27.7, 59.2, 50.8, 19.4, 89.2, 37.1, 32…
## $ Chicago <dbl> 52.5, 29.8, 45.2, 24.0, 34.9, 23.9, 34.1, 38.1, 14.3, 33…
## $ NYC <dbl> 39.0, 11.3, 29.5, 23.8, 27.1, 31.4, 30.6, 63.2, 46.6, 13…
long_DF <- snowfall_data %>% gather(City,total_snowfall, Boston:NYC)
head(long_DF, 24) # note, for brevity, I only show the data for the first two years
## begin end City total_snowfall
## 1 1940-01-01 1941-01-01 Boston 47.8
## 2 1941-01-01 1942-01-01 Boston 23.9
## 3 1942-01-01 1943-01-01 Boston 45.7
## 4 1943-01-01 1944-01-01 Boston 27.7
## 5 1944-01-01 1945-01-01 Boston 59.2
## 6 1945-01-01 1946-01-01 Boston 50.8
## 7 1946-01-01 1947-01-01 Boston 19.4
## 8 1947-01-01 1948-01-01 Boston 89.2
## 9 1948-01-01 1949-01-01 Boston 37.1
## 10 1949-01-01 1950-01-01 Boston 32.0
## 11 1950-01-01 1951-01-01 Boston 29.7
## 12 1951-01-01 1952-01-01 Boston 31.9
## 13 1952-01-01 1953-01-01 Boston 29.8
## 14 1953-01-01 1954-01-01 Boston 23.6
## 15 1954-01-01 1955-01-01 Boston 25.1
## 16 1955-01-01 1956-01-01 Boston 60.9
## 17 1956-01-01 1957-01-01 Boston 52.0
## 18 1957-01-01 1958-01-01 Boston 44.7
## 19 1958-01-01 1959-01-01 Boston 34.1
## 20 1959-01-01 1960-01-01 Boston 40.9
## 21 1960-01-01 1961-01-01 Boston 61.5
## 22 1961-01-01 1962-01-01 Boston 44.7
## 23 1962-01-01 1963-01-01 Boston 30.9
## 24 1963-01-01 1964-01-01 Boston 63.0
tail(snowfall_data)
## begin end Boston Chicago NYC
## 71 2010-01-01 2011-01-01 81.0 57.9 61.9
## 72 2011-01-01 2012-01-01 9.3 19.8 7.4
## 73 2012-01-01 2013-01-01 63.4 30.1 26.1
## 74 2013-01-01 2014-01-01 58.9 82.0 57.4
## 75 2014-01-01 2015-01-01 110.6 50.7 50.3
## 76 2015-01-01 2016-01-01 36.2 31.2 32.1
myurl <- "https://raw.githubusercontent.com/shahnp/data/master/Image/snowfall.png"
snowfall_background <- readPNG(getURLContent(myurl))
p <- ggplot(long_DF,
aes(x = City, y = total_snowfall, label = City, colour = City))+
annotation_custom(rasterGrob(snowfall_background,
width = unit(1,"npc"),
height = unit(1,"npc")),
-Inf, Inf, -Inf, Inf) +
geom_point(stat = "identity", size = 15)+
geom_hline(yintercept = 37.47, linetype="dotted",
color = "Red", size=1.5)+
geom_text(aes(x=1.5, label="Mean Snowfall", y=45), colour="blue", vjust = 1.2, text=element_text(size=18))+
geom_segment(aes (x = City,
y = 2.8,
xend = City,
yend = total_snowfall)) +
geom_text(color = "black", size = 3)+
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank()) + #Centre plot title
ylim(0,120)+
labs(title = "Snowfall Change {frame_time}", x= "City", y = "Total Snow (inches)")+
transition_time(begin)+
ease_aes("linear") # each frame should take same amount of time.
## Warning: Ignoring unknown parameters: text
# animate(p, nframes = 300, fps = 5, end_pause = 20)