library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
load("/cloud/project/olympiaWeather.rdata")
#Issue Description Correlation between weather and ‘Misery Index’
#Questions Define at least two specific questions you would like to attempt to answer.
#Data Source Identify the data source(s) you will use for your analysis. Provide a URL if possible.
#Documentation Provide a link to the documentation for the data or the documentation itself. Is there a data dictionary?
#Form of the Data What type of data file(s) will you need to import into R.
#Get and modify the data
olywthr %>%
#Change the months to actual names (Got this from presidential data set)
mutate(Month = case_when(
mo == 1 ~ "January",
mo == 2 ~ "February",
mo == 3 ~ "March",
mo == 4 ~ "April",
mo == 5 ~ "May",
mo == 6 ~ "June",
mo == 7 ~ "July",
mo == 8 ~ "August",
mo == 9 ~ "September",
mo == 10 ~ "October",
mo == 11 ~ "November",
mo == 12 ~ "December")
) %>%
group_by(Month) %>%
#I'll revisit this later, not quite sure how to calculate 'misery'
#For right now it's roughly:
#((Rain + Snow + How cold tmax is + How cold tmin is) / The square root of the total values) scaled to 1
mutate(misery = ((PRCP + SNOW + (104 - TMAX) + (76 - TMIN)) / sqrt(4)) / 82) %>%
#Reorder the data by month and then boxplot by 'misery'
ggplot(aes(x=factor(Month, levels =
c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")),
y=misery)) +
geom_boxplot() +
#Limit the y axis to 1
ylim(0, 1) +
#Not sure if I want to keep this, thought it was a cool visual
geom_dotplot(binaxis='y',
binwidth = 0.001,
stackdir='center',
dotsize = .3,
fill="red") +
#Always label your axis'
ggtitle("'Misery Index' in Olympia [1877 - 2017]") +
xlab("Month") + ylab("Misery Index")
#This is very fun to play around with.