#library =======
library(datasets) #loading the library
library(DescTools) #required for mean AD ()
library(Stat2Data)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, mutate, rename, summarise, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
#data import =====
Ohare_Climate_Data <-read.csv("ENVS203_Homework_6_Chicago_Ohare_Temp_Precip_SP26.csv", stringsAsFactors = T)

#data check =======
str(Ohare_Climate_Data)
## 'data.frame':    66 obs. of  8 variables:
##  $ YEAR    : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ PRCP_MAX: num  2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
##  $ SNOW_MAX: num  5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
##  $ SNWD_MAX: num  11 8 10 8 7 10 7 27 5 10 ...
##  $ TMAX_MAX: int  99 93 94 95 96 94 97 92 96 96 ...
##  $ TMIN_MAX: int  -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
##  $ TMAX_AVG: num  57.2 58.3 57.9 58.3 59.8 ...
##  $ TMIN_AVG: num  40.5 38.7 38.5 36.2 37.9 ...
head(Ohare_Climate_Data)
##   YEAR PRCP_MAX SNOW_MAX SNWD_MAX TMAX_MAX TMIN_MAX TMAX_AVG TMIN_AVG
## 1 1960     2.00      5.8       11       99      -17 57.17760 40.46175
## 2 1961     2.88      7.4        8       93       -8 58.33973 38.68767
## 3 1962     2.84      8.6       10       94      -15 57.89863 38.49863
## 4 1963     1.98      8.2        8       95      -19 58.31781 36.16164
## 5 1964     2.33      7.4        7       96      -13 59.81421 37.91803
## 6 1965     1.77      6.8       10       94      -14 57.84110 38.26849
#renaming ======
names(Ohare_Climate_Data)
## [1] "YEAR"     "PRCP_MAX" "SNOW_MAX" "SNWD_MAX" "TMAX_MAX" "TMIN_MAX" "TMAX_AVG"
## [8] "TMIN_AVG"
names(Ohare_Climate_Data)<- c("year", 
                              "highest_annual_precip_inches", 
                              "highest_annual_snowfall_inches", 
                              "highest_annual_snow_depth_inches", 
                              "highest_annual_temp_F",
                              "lowest_annual_temp_F", 
                              "average_annual_max_daily_temp_F",
                              "average_annual_min_daily_temp_F")
#transforming data==========
temp_celsius<-transform(Ohare_Climate_Data,
                        lowest_annual_temp_C = (Ohare_Climate_Data$lowest_annual_temp_F - 32)/1.8,
                        highest_annual_temp_C = (Ohare_Climate_Data$highest_annual_temp_F - 32)/1.8,
                        average_annual_min_daily_temp_C = (Ohare_Climate_Data$average_annual_min_daily_temp_F - 32)/1.8,
                        average_annual_max_daily_temp_C = (Ohare_Climate_Data$average_annual_max_daily_temp_F - 32)/1.8)
head(temp_celsius)
##   year highest_annual_precip_inches highest_annual_snowfall_inches
## 1 1960                         2.00                            5.8
## 2 1961                         2.88                            7.4
## 3 1962                         2.84                            8.6
## 4 1963                         1.98                            8.2
## 5 1964                         2.33                            7.4
## 6 1965                         1.77                            6.8
##   highest_annual_snow_depth_inches highest_annual_temp_F lowest_annual_temp_F
## 1                               11                    99                  -17
## 2                                8                    93                   -8
## 3                               10                    94                  -15
## 4                                8                    95                  -19
## 5                                7                    96                  -13
## 6                               10                    94                  -14
##   average_annual_max_daily_temp_F average_annual_min_daily_temp_F
## 1                        57.17760                        40.46175
## 2                        58.33973                        38.68767
## 3                        57.89863                        38.49863
## 4                        58.31781                        36.16164
## 5                        59.81421                        37.91803
## 6                        57.84110                        38.26849
##   lowest_annual_temp_C highest_annual_temp_C average_annual_min_daily_temp_C
## 1            -27.22222              37.22222                        4.700971
## 2            -22.22222              33.88889                        3.715373
## 3            -26.11111              34.44444                        3.610350
## 4            -28.33333              35.00000                        2.312024
## 5            -25.00000              35.55556                        3.287796
## 6            -25.55556              34.44444                        3.482496
##   average_annual_max_daily_temp_C
## 1                        13.98755
## 2                        14.63318
## 3                        14.38813
## 4                        14.62100
## 5                        15.45234
## 6                        14.35616
#coding columns=====



temp_celsius <- within(temp_celsius, {
  lengthCode <- NA 
  lengthCode[year < 1970]<- "1960s"
  
  lengthCode[year >= 1970 & year
             <= 1979]<- "1970s"
  lengthCode[year >= 1980 & year
             <= 1989]<- "1980s"
  lengthCode[year >= 1990 & year
             <= 1999]<- "1990s"
  lengthCode[year >= 2000 & year
             <= 2009]<- "2000s"
  lengthCode[year >= 2010 & year
             <= 2019]<- "2010s"
  lengthCode[year >= 2020 & year
             <= 2029]<- "2020s"})
#summarizing data with ddply
sumClimate_Data01 <- ddply(temp_celsius, c("lengthCode"), summarize,
                           countaverage_annual_min_daily_temp_C = sum(!is.na(average_annual_min_daily_temp_C)),
                           meanaverage_annual_min_daily_temp_C = mean(average_annual_min_daily_temp_C, na.rm = T),
                           sdaverage_annual_min_daily_temp_C = sd(average_annual_min_daily_temp_C, na.rm = T),
                           sdaverage_annual_max_daily_temp_C = sd(average_annual_max_daily_temp_C, na.rm = T),
                           seaverage_annual_min_daily_temp_C = sdaverage_annual_min_daily_temp_C /
                             sqrt(countaverage_annual_min_daily_temp_C))
sumClimate_Data01
##   lengthCode countaverage_annual_min_daily_temp_C
## 1      1960s                                   10
## 2      1970s                                   10
## 3      1980s                                   10
## 4      1990s                                   10
## 5      2000s                                   10
## 6      2010s                                   10
## 7      2020s                                    6
##   meanaverage_annual_min_daily_temp_C sdaverage_annual_min_daily_temp_C
## 1                            3.763744                         0.8406270
## 2                            4.688684                         1.0159995
## 3                            4.009297                         0.8025442
## 4                            4.960440                         1.0186722
## 5                            5.172084                         0.6839123
## 6                            5.747800                         1.0278729
## 7                            7.142727                         0.7292445
##   sdaverage_annual_max_daily_temp_C seaverage_annual_min_daily_temp_C
## 1                         0.5296523                         0.2658296
## 2                         0.8081171                         0.3212873
## 3                         0.7504866                         0.2537868
## 4                         1.0895146                         0.3221324
## 5                         0.6432253                         0.2162720
## 6                         1.1446605                         0.3250420
## 7                         0.7171727                         0.2977128
str(temp_celsius)
## 'data.frame':    66 obs. of  13 variables:
##  $ year                            : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ highest_annual_precip_inches    : num  2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
##  $ highest_annual_snowfall_inches  : num  5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
##  $ highest_annual_snow_depth_inches: num  11 8 10 8 7 10 7 27 5 10 ...
##  $ highest_annual_temp_F           : int  99 93 94 95 96 94 97 92 96 96 ...
##  $ lowest_annual_temp_F            : int  -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
##  $ average_annual_max_daily_temp_F : num  57.2 58.3 57.9 58.3 59.8 ...
##  $ average_annual_min_daily_temp_F : num  40.5 38.7 38.5 36.2 37.9 ...
##  $ lowest_annual_temp_C            : num  -27.2 -22.2 -26.1 -28.3 -25 ...
##  $ highest_annual_temp_C           : num  37.2 33.9 34.4 35 35.6 ...
##  $ average_annual_min_daily_temp_C : num  4.7 3.72 3.61 2.31 3.29 ...
##  $ average_annual_max_daily_temp_C : num  14 14.6 14.4 14.6 15.5 ...
##  $ lengthCode                      : chr  "1960s" "1960s" "1960s" "1960s" ...
#ggplot graphs=====
library(ggplot2)
boxClimateData <- ggplot(data = temp_celsius,
                 aes(x = lengthCode, y = average_annual_max_daily_temp_C))
#### Run your graph
library(RColorBrewer)
boxClimateData + # data layer
  theme_classic() + # theme
  geom_boxplot(fill="lightblue")  # graph type

boxClimateData + theme_classic() +
  geom_boxplot(fill="lightblue") +
  labs(title = "Average Annual Maximum Daily Temperatures in Celsius by Decade",
         subtitle = "of O'hare between the 1960s-2020s",
         x = "Decade",
         y = "Average Annual Maximum Daily Temperature in Celsius")+
  scale_fill_brewer(palette="Dark2")

#bar graph====
str(temp_celsius)
## 'data.frame':    66 obs. of  13 variables:
##  $ year                            : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ highest_annual_precip_inches    : num  2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
##  $ highest_annual_snowfall_inches  : num  5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
##  $ highest_annual_snow_depth_inches: num  11 8 10 8 7 10 7 27 5 10 ...
##  $ highest_annual_temp_F           : int  99 93 94 95 96 94 97 92 96 96 ...
##  $ lowest_annual_temp_F            : int  -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
##  $ average_annual_max_daily_temp_F : num  57.2 58.3 57.9 58.3 59.8 ...
##  $ average_annual_min_daily_temp_F : num  40.5 38.7 38.5 36.2 37.9 ...
##  $ lowest_annual_temp_C            : num  -27.2 -22.2 -26.1 -28.3 -25 ...
##  $ highest_annual_temp_C           : num  37.2 33.9 34.4 35 35.6 ...
##  $ average_annual_min_daily_temp_C : num  4.7 3.72 3.61 2.31 3.29 ...
##  $ average_annual_max_daily_temp_C : num  14 14.6 14.4 14.6 15.5 ...
##  $ lengthCode                      : chr  "1960s" "1960s" "1960s" "1960s" ...
climateGraph<- ddply(temp_celsius, c("lengthCode"),summarise,
                 dataRep = sum(!is.na(average_annual_max_daily_temp_C)),
                 dataMean = mean(average_annual_max_daily_temp_C, na.rm=T),
                 dataSD = sd(average_annual_max_daily_temp_C, na.rm=T),
                 dataSE = dataSD / sqrt(dataRep),
                 dataCI95 = dataSE * 1.96)

head(climateGraph)
##   lengthCode dataRep dataMean    dataSD    dataSE  dataCI95
## 1      1960s      10 14.61008 0.5296523 0.1674908 0.3282819
## 2      1970s      10 14.83865 0.8081171 0.2555491 0.5008761
## 3      1980s      10 14.82960 0.7504866 0.2373247 0.4651564
## 4      1990s      10 15.09043 1.0895146 0.3445348 0.6752881
## 5      2000s      10 15.09840 0.6432253 0.2034057 0.3986751
## 6      2010s      10 15.33399 1.1446605 0.3619734 0.7094679
graphBarClimateData <- ggplot(data = climateGraph, aes(x = lengthCode,
                                                       y = dataMean))
limits<- aes(ymax= dataMean + dataSD, 
             ymin= dataMean - dataSD)
dodge<- position_dodge(width= 0.9) #side by side graph


graphBarClimateData + theme_classic() +
  geom_bar(stat="identity", position = dodge,
           color = "black", fill = "lightblue") +
  geom_errorbar(limits, position = dodge, width = 0.2) +
  labs(title = "Bar Graph for Standard Deviation of Average Annual Maximum Daily Temperature",
         subtitle = "from Ohare Climate data between 1960s and 2020s",
          x = "Decade",
         y = "Average Annual Maximum Daily Temperature in Celsius")

#point graph=====
str(temp_celsius)
## 'data.frame':    66 obs. of  13 variables:
##  $ year                            : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ highest_annual_precip_inches    : num  2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
##  $ highest_annual_snowfall_inches  : num  5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
##  $ highest_annual_snow_depth_inches: num  11 8 10 8 7 10 7 27 5 10 ...
##  $ highest_annual_temp_F           : int  99 93 94 95 96 94 97 92 96 96 ...
##  $ lowest_annual_temp_F            : int  -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
##  $ average_annual_max_daily_temp_F : num  57.2 58.3 57.9 58.3 59.8 ...
##  $ average_annual_min_daily_temp_F : num  40.5 38.7 38.5 36.2 37.9 ...
##  $ lowest_annual_temp_C            : num  -27.2 -22.2 -26.1 -28.3 -25 ...
##  $ highest_annual_temp_C           : num  37.2 33.9 34.4 35 35.6 ...
##  $ average_annual_min_daily_temp_C : num  4.7 3.72 3.61 2.31 3.29 ...
##  $ average_annual_max_daily_temp_C : num  14 14.6 14.4 14.6 15.5 ...
##  $ lengthCode                      : chr  "1960s" "1960s" "1960s" "1960s" ...
climateGraph<- ddply(temp_celsius, c("lengthCode"),summarise,
                     dataRep = sum(!is.na(average_annual_max_daily_temp_C)),
                     dataMean = mean(average_annual_max_daily_temp_C, na.rm=T),
                     dataSD = sd(average_annual_max_daily_temp_C, na.rm=T),
                     dataSE = dataSD / sqrt(dataRep),
                     dataCI95 = dataSE * 1.96)

head(climateGraph)
##   lengthCode dataRep dataMean    dataSD    dataSE  dataCI95
## 1      1960s      10 14.61008 0.5296523 0.1674908 0.3282819
## 2      1970s      10 14.83865 0.8081171 0.2555491 0.5008761
## 3      1980s      10 14.82960 0.7504866 0.2373247 0.4651564
## 4      1990s      10 15.09043 1.0895146 0.3445348 0.6752881
## 5      2000s      10 15.09840 0.6432253 0.2034057 0.3986751
## 6      2010s      10 15.33399 1.1446605 0.3619734 0.7094679
graphPointClimateData <- ggplot(data = climateGraph, aes(x = lengthCode,
                                                       y = dataMean))
limits<- aes(ymax= dataMean + dataSE, 
             ymin= dataMean - dataSE)
dodge<- position_dodge(width= 0.9) #side by side graph


graphPointClimateData + theme_classic() +
  geom_point(stat="identity", position = dodge,
           color = "pink", fill = "lightgreen") +
  geom_errorbar(limits, position = dodge, width = 0.2) +
  labs(title = "Point Graph for Standard Error of Average Annual Maximum Daily Temperature",
       subtitle = "from Ohare Climate data between 1960s and 2020s",
       x = "Decade",
       y = "Average Annual Maximum Daily Temperature in Celsius")