library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
load("/cloud/project/olympiaWeather.rdata")

#Issue Description Correlation between weather and ‘Misery Index’

#Questions Define at least two specific questions you would like to attempt to answer.

#Data Source Identify the data source(s) you will use for your analysis. Provide a URL if possible.

#Documentation Provide a link to the documentation for the data or the documentation itself. Is there a data dictionary?

#Form of the Data What type of data file(s) will you need to import into R.

#Get and modify the data
olywthr %>%
  
  #Change the months to actual names (Got this from presidential data set)
  mutate(Month = case_when(
    mo == 1 ~ "January",
    mo == 2 ~ "February",
    mo == 3 ~ "March",
    mo == 4 ~ "April",
    mo == 5 ~ "May",
    mo == 6 ~ "June",
    mo == 7 ~ "July",
    mo == 8 ~ "August",
    mo == 9 ~ "September",
    mo == 10 ~ "October",
    mo == 11 ~ "November",
    mo == 12 ~ "December")
  ) %>%
  group_by(Month) %>%
  
  #I'll revisit this later, not quite sure how to calculate 'misery'
  #For right now it's roughly:
  #((Rain + Snow + How cold tmax is + How cold tmin is) / The square root of the total values) scaled to 1
  mutate(misery = ((PRCP + SNOW + (104 - TMAX) + (76 - TMIN)) / sqrt(4)) / 82) %>%
  
  #Reorder the data by month and then boxplot by 'misery'
  ggplot(aes(x=factor(Month, levels =
                        c("January", "February", "March", "April",
                          "May", "June", "July", "August", "September",
                          "October", "November", "December")),
             y=misery)) +
  geom_boxplot() +
  
  #Limit the y axis to 1
  ylim(0, 1) +
  #Not sure if I want to keep this, thought it was a cool visual
  geom_dotplot(binaxis='y', 
               binwidth = 0.001,
               stackdir='center', 
               dotsize = .3, 
               fill="red") +
  
  #Always label your axis'
  ggtitle("'Misery Index' in Olympia [1877 - 2017]") +
  xlab("Month") + ylab("Misery Index")

  #This is very fun to play around with.