Part 1

Question 1

1048 Wind speed, possibly because of a broken recorder. Maybe tracked the wrong object to get such a high value.

weather %>% filter(wind_speed > 250)
## # A tibble: 1 × 15
##   origin  year month   day  hour  temp  dewp humid wind_dir wind_speed wind_gust
##   <chr>  <int> <int> <int> <int> <dbl> <dbl> <dbl>    <dbl>      <dbl>     <dbl>
## 1 EWR     2013     2    12     3  39.0  27.0  61.6      260      1048.        NA
## # ℹ 4 more variables: precip <dbl>, pressure <dbl>, visib <dbl>,
## #   time_hour <dttm>
ggplot((weather %>% filter(origin == 'EWR')), aes(x = time_hour, y = wind_speed)) +
  geom_point(alpha = 1, color = "black") 
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Question 2

No, North West has the most frequency.

return <- weather %>%
  mutate(var = case_when(
    (wind_dir >= 337.5 | wind_dir < 22.5)  ~ "N", wind_dir >= 22.5 & wind_dir < 67.5     ~ "NE", wind_dir >= 67.5 & wind_dir < 112.5    ~ "E", wind_dir >= 112.5 & wind_dir < 157.5   ~ "SE", wind_dir >= 157.5 & wind_dir < 202.5   ~ "S", wind_dir >= 202.5 & wind_dir < 247.5   ~ "SW", wind_dir >= 247.5 & wind_dir < 292.5   ~ "W", wind_dir >= 292.5 & wind_dir < 337.5   ~ "NW", ))

plot <- return %>% count(var)

ggplot(plot, aes(x = var, y = n)) + geom_bar(stat = "identity") 

Question 3

Fall North, Spring North West, Summer South West, Winter West

weather %>% mutate(season = case_when( month %in% c(12,1,2) ~ "Winter", month %in% c(3,4,5) ~ "Spring", month %in% c(6,7,8) ~ "Summer", T ~ "Fall"), wind_compass = case_when((wind_dir >= 337.5 | wind_dir < 22.5) ~ "N",  wind_dir < 67.5 ~ "NE", wind_dir < 112.5 ~ "E", wind_dir < 157.5 ~ "SE", wind_dir < 202.5 ~ "S", wind_dir < 247.5 ~ "SW", wind_dir < 292.5 ~ "W", T ~ "NW" ) ) %>%  ggplot(aes(x = season, fill = wind_compass)) + geom_bar(position = "dodge")

Question 4

No it doesn’t look normally distributed as it isn’t a perfect bell curve. The QQ Plot confirms this.

ggplot(weather, aes(x = temp)) + geom_histogram(aes(y = ..density..), bins = 30) + stat_function(fun = dnorm, args = list(mean = mean(weather$temp, na.rm = T), sd = sd(weather$temp, na.rm = T)), color = "red", size = 1) + geom_density(color = "blue", size = 1) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

qqnorm(weather$temp, main = "")
qqline(weather$temp, col = "black", lwd = 2)

Question 5

ggplot((weather %>% mutate(season = case_when(month %in% c(12,1,2) ~ "Winter", month %in% c(3,4,5) ~ "Spring", month %in% c(6,7,8) ~ "Summer", T ~ "Fall" ))), aes(x = season, y = temp, fill = season)) + geom_boxplot() + stat_compare_means(comparisons = list(c("Summer", "Winter"), c("Spring", "Fall"))) 
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_signif()`).

Part 2

Question 6

return <- weather %>%  group_by(year, month, day) %>% summarise(min_temp = min(temp, na.rm = T), max_temp = max(temp, na.rm = T)) %>% mutate(season = case_when(month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5)  ~ "Spring", month %in% c(6, 7, 8)  ~ "Summer", T ~ "Fall"))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
ggplot(return, aes(x = min_temp, y = max_temp, color = season)) + geom_point() + geom_smooth(method = "lm", se = F, color = "black") 
## `geom_smooth()` using formula = 'y ~ x'

Question 7

daily_weather <- weather %>% group_by(year, month, day) %>% summarise(mean_temp = mean(temp, na.rm = T), mean_wind = mean(wind_speed, na.rm = T)) %>% mutate(season = case_when(month %in% c(12, 1, 2) ~ "Winter", month %in% c(3, 4, 5)  ~ "Spring", month %in% c(6, 7, 8)  ~ "Summer", T ~ "Fall"))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
ggplot(daily_weather, aes(x = mean_temp, y = mean_wind, color = season)) + geom_point() + geom_smooth(se = F) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Question 8

No, I don’t think so, primarily because of the large oulier in Spring.

daily <- weather %>% group_by(origin, year, month, day) %>%  summarise(tmax = max(temp, na.rm = T), tmin = min(temp, na.rm = T)) %>% ungroup() %>% mutate(season = case_when(month %in% c(12, 1, 2) ~ "Winter", month %in% c(3, 4, 5) ~ "Spring", month %in% c(6, 7, 8) ~ "Summer", month %in% c(9, 10, 11) ~ "Fall"), ratio = tmax / tmin, diff = tmax - tmin)
## `summarise()` has grouped output by 'origin', 'year', 'month'. You can override
## using the `.groups` argument.
p1 <- ggplot(daily, aes(x = season, y = ratio)) + geom_boxplot()
p2 <- ggplot(daily, aes(x = diff)) + geom_histogram() + facet_wrap(~season, nrow = 2)

p1 + p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Question 9

p <- ggplot((weather %>% group_by(year, month, day) %>% summarise( mean_temp = mean(temp, na.rm = TRUE), min_temp = min(temp, na.rm = TRUE), max_temp = max(temp, na.rm = TRUE)) %>% ungroup() %>% mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% pivot_longer(cols = c(mean_temp, min_temp, max_temp), names_to = "type", values_to = "temperature")), aes(x = date, y = temperature, color = type)) + geom_line() 
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
ggplotly(p, tooltip = c("x", "y", "color"))
return <- ggplot((weather %>% group_by(year, month, day) %>% summarise( mean_temp = mean(temp, na.rm = TRUE), min_temp = min(temp, na.rm = TRUE), max_temp = max(temp, na.rm = TRUE)) %>% ungroup() %>% mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% pivot_longer(cols = c(mean_temp, min_temp, max_temp), names_to = "type", values_to = "temperature")), aes(x = date, y = temperature, color = type)) + geom_line() + transition_reveal(date)
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
animate(return, nframes = 200, fps = 20)
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Question 10

return <- weather %>% group_by(month) %>% summarise(tmax = max(temp, na.rm = T), tmin = min(temp, na.rm = T)) %>% pivot_longer(cols = c(tmin, tmax), names_to = "type", values_to = "temperature")

ggplot(return, aes(x = factor(month), y = temperature, fill = type)) + geom_bar(stat = "identity", position = "stack") + coord_polar(start = 0)