#library =======
library(datasets) #loading the library
library(DescTools) #required for mean AD ()
library(Stat2Data)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, mutate, rename, summarise, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
#data import =====
Ohare_Climate_Data <-read.csv("ENVS203_Homework_6_Chicago_Ohare_Temp_Precip_SP26.csv", stringsAsFactors = T)
#data check =======
str(Ohare_Climate_Data)
## 'data.frame': 66 obs. of 8 variables:
## $ YEAR : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ PRCP_MAX: num 2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
## $ SNOW_MAX: num 5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
## $ SNWD_MAX: num 11 8 10 8 7 10 7 27 5 10 ...
## $ TMAX_MAX: int 99 93 94 95 96 94 97 92 96 96 ...
## $ TMIN_MAX: int -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
## $ TMAX_AVG: num 57.2 58.3 57.9 58.3 59.8 ...
## $ TMIN_AVG: num 40.5 38.7 38.5 36.2 37.9 ...
head(Ohare_Climate_Data)
## YEAR PRCP_MAX SNOW_MAX SNWD_MAX TMAX_MAX TMIN_MAX TMAX_AVG TMIN_AVG
## 1 1960 2.00 5.8 11 99 -17 57.17760 40.46175
## 2 1961 2.88 7.4 8 93 -8 58.33973 38.68767
## 3 1962 2.84 8.6 10 94 -15 57.89863 38.49863
## 4 1963 1.98 8.2 8 95 -19 58.31781 36.16164
## 5 1964 2.33 7.4 7 96 -13 59.81421 37.91803
## 6 1965 1.77 6.8 10 94 -14 57.84110 38.26849
#renaming ======
names(Ohare_Climate_Data)
## [1] "YEAR" "PRCP_MAX" "SNOW_MAX" "SNWD_MAX" "TMAX_MAX" "TMIN_MAX" "TMAX_AVG"
## [8] "TMIN_AVG"
names(Ohare_Climate_Data)<- c("year",
"highest_annual_precip_inches",
"highest_annual_snowfall_inches",
"highest_annual_snow_depth_inches",
"highest_annual_temp_F",
"lowest_annual_temp_F",
"average_annual_max_daily_temp_F",
"average_annual_min_daily_temp_F")
#transforming data==========
temp_celsius<-transform(Ohare_Climate_Data,
lowest_annual_temp_C = (Ohare_Climate_Data$lowest_annual_temp_F - 32)/1.8,
highest_annual_temp_C = (Ohare_Climate_Data$highest_annual_temp_F - 32)/1.8,
average_annual_min_daily_temp_C = (Ohare_Climate_Data$average_annual_min_daily_temp_F - 32)/1.8,
average_annual_max_daily_temp_C = (Ohare_Climate_Data$average_annual_max_daily_temp_F - 32)/1.8)
head(temp_celsius)
## year highest_annual_precip_inches highest_annual_snowfall_inches
## 1 1960 2.00 5.8
## 2 1961 2.88 7.4
## 3 1962 2.84 8.6
## 4 1963 1.98 8.2
## 5 1964 2.33 7.4
## 6 1965 1.77 6.8
## highest_annual_snow_depth_inches highest_annual_temp_F lowest_annual_temp_F
## 1 11 99 -17
## 2 8 93 -8
## 3 10 94 -15
## 4 8 95 -19
## 5 7 96 -13
## 6 10 94 -14
## average_annual_max_daily_temp_F average_annual_min_daily_temp_F
## 1 57.17760 40.46175
## 2 58.33973 38.68767
## 3 57.89863 38.49863
## 4 58.31781 36.16164
## 5 59.81421 37.91803
## 6 57.84110 38.26849
## lowest_annual_temp_C highest_annual_temp_C average_annual_min_daily_temp_C
## 1 -27.22222 37.22222 4.700971
## 2 -22.22222 33.88889 3.715373
## 3 -26.11111 34.44444 3.610350
## 4 -28.33333 35.00000 2.312024
## 5 -25.00000 35.55556 3.287796
## 6 -25.55556 34.44444 3.482496
## average_annual_max_daily_temp_C
## 1 13.98755
## 2 14.63318
## 3 14.38813
## 4 14.62100
## 5 15.45234
## 6 14.35616
#coding columns=====
temp_celsius <- within(temp_celsius, {
lengthCode <- NA
lengthCode[year < 1970]<- "1960s"
lengthCode[year >= 1970 & year
<= 1979]<- "1970s"
lengthCode[year >= 1980 & year
<= 1989]<- "1980s"
lengthCode[year >= 1990 & year
<= 1999]<- "1990s"
lengthCode[year >= 2000 & year
<= 2009]<- "2000s"
lengthCode[year >= 2010 & year
<= 2019]<- "2010s"
lengthCode[year >= 2020 & year
<= 2029]<- "2020s"})
#summarizing data with ddply
sumClimate_Data01 <- ddply(temp_celsius, c("lengthCode"), summarize,
countaverage_annual_min_daily_temp_C = sum(!is.na(average_annual_min_daily_temp_C)),
meanaverage_annual_min_daily_temp_C = mean(average_annual_min_daily_temp_C, na.rm = T),
sdaverage_annual_min_daily_temp_C = sd(average_annual_min_daily_temp_C, na.rm = T),
sdaverage_annual_max_daily_temp_C = sd(average_annual_max_daily_temp_C, na.rm = T),
seaverage_annual_min_daily_temp_C = sdaverage_annual_min_daily_temp_C /
sqrt(countaverage_annual_min_daily_temp_C))
sumClimate_Data01
## lengthCode countaverage_annual_min_daily_temp_C
## 1 1960s 10
## 2 1970s 10
## 3 1980s 10
## 4 1990s 10
## 5 2000s 10
## 6 2010s 10
## 7 2020s 6
## meanaverage_annual_min_daily_temp_C sdaverage_annual_min_daily_temp_C
## 1 3.763744 0.8406270
## 2 4.688684 1.0159995
## 3 4.009297 0.8025442
## 4 4.960440 1.0186722
## 5 5.172084 0.6839123
## 6 5.747800 1.0278729
## 7 7.142727 0.7292445
## sdaverage_annual_max_daily_temp_C seaverage_annual_min_daily_temp_C
## 1 0.5296523 0.2658296
## 2 0.8081171 0.3212873
## 3 0.7504866 0.2537868
## 4 1.0895146 0.3221324
## 5 0.6432253 0.2162720
## 6 1.1446605 0.3250420
## 7 0.7171727 0.2977128
str(temp_celsius)
## 'data.frame': 66 obs. of 13 variables:
## $ year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ highest_annual_precip_inches : num 2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
## $ highest_annual_snowfall_inches : num 5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
## $ highest_annual_snow_depth_inches: num 11 8 10 8 7 10 7 27 5 10 ...
## $ highest_annual_temp_F : int 99 93 94 95 96 94 97 92 96 96 ...
## $ lowest_annual_temp_F : int -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
## $ average_annual_max_daily_temp_F : num 57.2 58.3 57.9 58.3 59.8 ...
## $ average_annual_min_daily_temp_F : num 40.5 38.7 38.5 36.2 37.9 ...
## $ lowest_annual_temp_C : num -27.2 -22.2 -26.1 -28.3 -25 ...
## $ highest_annual_temp_C : num 37.2 33.9 34.4 35 35.6 ...
## $ average_annual_min_daily_temp_C : num 4.7 3.72 3.61 2.31 3.29 ...
## $ average_annual_max_daily_temp_C : num 14 14.6 14.4 14.6 15.5 ...
## $ lengthCode : chr "1960s" "1960s" "1960s" "1960s" ...
#ggplot graphs=====
library(ggplot2)
boxClimateData <- ggplot(data = temp_celsius,
aes(x = lengthCode, y = average_annual_max_daily_temp_C))
#### Run your graph
library(RColorBrewer)
boxClimateData + # data layer
theme_classic() + # theme
geom_boxplot(fill="lightblue") # graph type

boxClimateData + theme_classic() +
geom_boxplot(fill="lightblue") +
labs(title = "Average Annual Maximum Daily Temperatures in Celsius by Decade",
subtitle = "of O'hare between the 1960s-2020s",
x = "Decade",
y = "Average Annual Maximum Daily Temperature in Celsius")+
scale_fill_brewer(palette="Dark2")

#bar graph====
str(temp_celsius)
## 'data.frame': 66 obs. of 13 variables:
## $ year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ highest_annual_precip_inches : num 2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
## $ highest_annual_snowfall_inches : num 5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
## $ highest_annual_snow_depth_inches: num 11 8 10 8 7 10 7 27 5 10 ...
## $ highest_annual_temp_F : int 99 93 94 95 96 94 97 92 96 96 ...
## $ lowest_annual_temp_F : int -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
## $ average_annual_max_daily_temp_F : num 57.2 58.3 57.9 58.3 59.8 ...
## $ average_annual_min_daily_temp_F : num 40.5 38.7 38.5 36.2 37.9 ...
## $ lowest_annual_temp_C : num -27.2 -22.2 -26.1 -28.3 -25 ...
## $ highest_annual_temp_C : num 37.2 33.9 34.4 35 35.6 ...
## $ average_annual_min_daily_temp_C : num 4.7 3.72 3.61 2.31 3.29 ...
## $ average_annual_max_daily_temp_C : num 14 14.6 14.4 14.6 15.5 ...
## $ lengthCode : chr "1960s" "1960s" "1960s" "1960s" ...
climateGraph<- ddply(temp_celsius, c("lengthCode"),summarise,
dataRep = sum(!is.na(average_annual_max_daily_temp_C)),
dataMean = mean(average_annual_max_daily_temp_C, na.rm=T),
dataSD = sd(average_annual_max_daily_temp_C, na.rm=T),
dataSE = dataSD / sqrt(dataRep),
dataCI95 = dataSE * 1.96)
head(climateGraph)
## lengthCode dataRep dataMean dataSD dataSE dataCI95
## 1 1960s 10 14.61008 0.5296523 0.1674908 0.3282819
## 2 1970s 10 14.83865 0.8081171 0.2555491 0.5008761
## 3 1980s 10 14.82960 0.7504866 0.2373247 0.4651564
## 4 1990s 10 15.09043 1.0895146 0.3445348 0.6752881
## 5 2000s 10 15.09840 0.6432253 0.2034057 0.3986751
## 6 2010s 10 15.33399 1.1446605 0.3619734 0.7094679
graphBarClimateData <- ggplot(data = climateGraph, aes(x = lengthCode,
y = dataMean))
limits<- aes(ymax= dataMean + dataSD,
ymin= dataMean - dataSD)
dodge<- position_dodge(width= 0.9) #side by side graph
graphBarClimateData + theme_classic() +
geom_bar(stat="identity", position = dodge,
color = "black", fill = "lightblue") +
geom_errorbar(limits, position = dodge, width = 0.2) +
labs(title = "Bar Graph for Standard Deviation of Average Annual Maximum Daily Temperature",
subtitle = "from Ohare Climate data between 1960s and 2020s",
x = "Decade",
y = "Average Annual Maximum Daily Temperature in Celsius")

#point graph=====
str(temp_celsius)
## 'data.frame': 66 obs. of 13 variables:
## $ year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ highest_annual_precip_inches : num 2 2.88 2.84 1.98 2.33 1.77 2.69 3.09 3.43 4.25 ...
## $ highest_annual_snowfall_inches : num 5.8 7.4 8.6 8.2 7.4 6.8 8.1 13.4 3.8 9.9 ...
## $ highest_annual_snow_depth_inches: num 11 8 10 8 7 10 7 27 5 10 ...
## $ highest_annual_temp_F : int 99 93 94 95 96 94 97 92 96 96 ...
## $ lowest_annual_temp_F : int -17 -8 -15 -19 -13 -14 -20 -17 -13 -8 ...
## $ average_annual_max_daily_temp_F : num 57.2 58.3 57.9 58.3 59.8 ...
## $ average_annual_min_daily_temp_F : num 40.5 38.7 38.5 36.2 37.9 ...
## $ lowest_annual_temp_C : num -27.2 -22.2 -26.1 -28.3 -25 ...
## $ highest_annual_temp_C : num 37.2 33.9 34.4 35 35.6 ...
## $ average_annual_min_daily_temp_C : num 4.7 3.72 3.61 2.31 3.29 ...
## $ average_annual_max_daily_temp_C : num 14 14.6 14.4 14.6 15.5 ...
## $ lengthCode : chr "1960s" "1960s" "1960s" "1960s" ...
climateGraph<- ddply(temp_celsius, c("lengthCode"),summarise,
dataRep = sum(!is.na(average_annual_max_daily_temp_C)),
dataMean = mean(average_annual_max_daily_temp_C, na.rm=T),
dataSD = sd(average_annual_max_daily_temp_C, na.rm=T),
dataSE = dataSD / sqrt(dataRep),
dataCI95 = dataSE * 1.96)
head(climateGraph)
## lengthCode dataRep dataMean dataSD dataSE dataCI95
## 1 1960s 10 14.61008 0.5296523 0.1674908 0.3282819
## 2 1970s 10 14.83865 0.8081171 0.2555491 0.5008761
## 3 1980s 10 14.82960 0.7504866 0.2373247 0.4651564
## 4 1990s 10 15.09043 1.0895146 0.3445348 0.6752881
## 5 2000s 10 15.09840 0.6432253 0.2034057 0.3986751
## 6 2010s 10 15.33399 1.1446605 0.3619734 0.7094679
graphPointClimateData <- ggplot(data = climateGraph, aes(x = lengthCode,
y = dataMean))
limits<- aes(ymax= dataMean + dataSE,
ymin= dataMean - dataSE)
dodge<- position_dodge(width= 0.9) #side by side graph
graphPointClimateData + theme_classic() +
geom_point(stat="identity", position = dodge,
color = "pink", fill = "lightgreen") +
geom_errorbar(limits, position = dodge, width = 0.2) +
labs(title = "Point Graph for Standard Error of Average Annual Maximum Daily Temperature",
subtitle = "from Ohare Climate data between 1960s and 2020s",
x = "Decade",
y = "Average Annual Maximum Daily Temperature in Celsius")
