setwd("~/Documents/DiDa_426")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
temps <- read.csv("temps_nyc.csv")
plot(temps$day, temps$actual_mean_temp)

#each successive ggplot call adds another modficiation, building upon the last one
#1- base plot in ggplot
ggplot(temps, aes(x = day, y = actual_mean_temp))+
geom_point()

#2- adding color to the points and axis titles
ggplot(temps, aes(x = day, y = actual_mean_temp))+
geom_point(color = "blue")+
labs(y = "Mean Temperature", x = "Day")

#3- adding a graph title to the last plot
ggplot(temps, aes(x = day, y = actual_mean_temp))+
geom_point(color = "blue")+
labs(y = "Mean Temperature", x = "Day",
title = "Mean Daily Temperature in New York City, 2014")

#splits the data from the last plot to represent different metrics of temperature per day using geom_point
ggplot(temps)+
geom_point(aes(x = day, y = actual_mean_temp), color = "gray")+
geom_point(aes(x = day, y = actual_min_temp), color = "blue")+
geom_point(aes(x = day, y = actual_max_temp), color = "red") +
labs(y = "Temperature", x = "Day", title = "Daily Temperature in New York City, 2014")

#takes the code from the last plot and then adds a legend for the reader using scale_color_manual
ggplot(temps) +
geom_point(aes(x = day, y = actual_mean_temp, color = "Mean"))+
geom_point(aes(x = day, y = actual_min_temp, color = "Min"))+
geom_point(aes(x = day, y = actual_max_temp, color = "Max"))+
labs(y = "Temperature", x = "Day",title = "Daily Temperature in New York City, 2014",color = "Temperature Values")+
scale_color_manual(labels = c("Max", "Mean", "Min"), values = c("red", "gray", "blue"))

#all of the previous graphs were progressions upon one another on the basic ways to create and modify graphs. Now we will go more in depth to data modification and easier ways to created graphs
#here we are only selecting columns of data that we are actually interested in
wide_data <- temps %>%
#we'll only select the variables of interest to do this
dplyr::select(c(day, actual_mean_temp, actual_min_temp, actual_max_temp))
#here we are pivoting the columns and the rows of the original data set
#setting the column names to the new row values called "temp_type" with "names_to"
#setting the row values to a new single column called "temp" with "values_to"
long_data <- wide_data %>%
pivot_longer(!day, names_to = "temp_type", values_to = "temp")