This project uses Warren Wilson College data collected on the farm weather station. You can collect it yourself at https://rainwise.net/weather/wwcfarm, but you can only collect a months worth of data at a time. For your convenience, I downloaded all data from 2017-present, cleaned it up a bit, and placed all of the data in one huge file called weather.csv. It is posted on Moodle.
Go ahead and download into your working directory, read it into R with the read.csv command, and save it as the dataframe weather.temp. (It’s large, so it may take R Markdown a bit to process this command.)
setwd("/Users/ethan/Desktop/Jame's R Labs")
weather.temp<-read.csv("weather.csv")
Go ahead and view the top 10 lines of the file to see what it looks like. What are the variables present?
head(weather.temp, 10)
## X Time Temp.Avg Wind.Chill Hum.Avg Windspeed Gust Month Year
## 1 1 2017-04-01 00:00:00 52.7 52.7 72 6.2 6.2 4 2017
## 2 2 2017-04-01 00:01:00 52.7 52.7 72 6.2 6.2 4 2017
## 3 3 2017-04-01 00:02:00 52.7 52.7 73 4.7 6.2 4 2017
## 4 4 2017-04-01 00:03:00 52.7 52.7 72 3.7 3.7 4 2017
## 5 5 2017-04-01 00:04:00 52.7 52.7 72 3.7 3.7 4 2017
## 6 6 2017-04-01 00:05:00 52.7 52.7 72 3.7 3.7 4 2017
## 7 7 2017-04-01 00:06:00 52.7 52.7 72 3.7 3.7 4 2017
## 8 8 2017-04-01 00:07:00 52.6 52.6 72 3.2 3.7 4 2017
## 9 9 2017-04-01 00:08:00 52.5 52.5 72 6.0 10.5 4 2017
## 10 10 2017-04-01 00:09:00 52.5 52.5 72 9.5 9.5 4 2017
weather<- weather.temp[,c(2:length(weather.temp))]
head(weather.temp)
## X Time Temp.Avg Wind.Chill Hum.Avg Windspeed Gust Month Year
## 1 1 2017-04-01 00:00:00 52.7 52.7 72 6.2 6.2 4 2017
## 2 2 2017-04-01 00:01:00 52.7 52.7 72 6.2 6.2 4 2017
## 3 3 2017-04-01 00:02:00 52.7 52.7 73 4.7 6.2 4 2017
## 4 4 2017-04-01 00:03:00 52.7 52.7 72 3.7 3.7 4 2017
## 5 5 2017-04-01 00:04:00 52.7 52.7 72 3.7 3.7 4 2017
## 6 6 2017-04-01 00:05:00 52.7 52.7 72 3.7 3.7 4 2017
head(weather)
## Time Temp.Avg Wind.Chill Hum.Avg Windspeed Gust Month Year
## 1 2017-04-01 00:00:00 52.7 52.7 72 6.2 6.2 4 2017
## 2 2017-04-01 00:01:00 52.7 52.7 72 6.2 6.2 4 2017
## 3 2017-04-01 00:02:00 52.7 52.7 73 4.7 6.2 4 2017
## 4 2017-04-01 00:03:00 52.7 52.7 72 3.7 3.7 4 2017
## 5 2017-04-01 00:04:00 52.7 52.7 72 3.7 3.7 4 2017
## 6 2017-04-01 00:05:00 52.7 52.7 72 3.7 3.7 4 2017
One of the trickiest things to work with in datasets are timestamps. Conveniently, a package related to ggplot2 called lubridate makes working with dates easier. In the console, use the install.packages command to install the lubridate package. Then load the library as follows:
library("lubridate")
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Sometimes it is useful to know the day, month, or year of a timestamp. Lubridate has those commands built in, so you can type, for instance,
month(weather[1,1])
## [1] 4
year(weather[1,1])
## [1] 2017
weather[1,1]
## [1] "2017-04-01 00:00:00"
Check that these give reasonable answers by looking at the [1,1] entry of weather.
day(weather[1000,1])
## [1] 1
month(weather[1000,1])
## [1] 4
names(weather)<- c("Time", "Temp.in.Farenheit", "Wind.Chill", "Humidity.Average", "Wind.Speed", "Gust", "Month", "Year")
head(weather)
## Time Temp.in.Farenheit Wind.Chill Humidity.Average Wind.Speed
## 1 2017-04-01 00:00:00 52.7 52.7 72 6.2
## 2 2017-04-01 00:01:00 52.7 52.7 72 6.2
## 3 2017-04-01 00:02:00 52.7 52.7 73 4.7
## 4 2017-04-01 00:03:00 52.7 52.7 72 3.7
## 5 2017-04-01 00:04:00 52.7 52.7 72 3.7
## 6 2017-04-01 00:05:00 52.7 52.7 72 3.7
## Gust Month Year
## 1 6.2 4 2017
## 2 6.2 4 2017
## 3 6.2 4 2017
## 4 3.7 4 2017
## 5 3.7 4 2017
## 6 3.7 4 2017
temp.in.Celcius<- c(1:length(weather[,1]))
weather$Temp.in.Celcius <- temp.in.Celcius
weather["Temp.in.Celcius"] <- temp.in.Celcius
head(weather)
## Time Temp.in.Farenheit Wind.Chill Humidity.Average Wind.Speed
## 1 2017-04-01 00:00:00 52.7 52.7 72 6.2
## 2 2017-04-01 00:01:00 52.7 52.7 72 6.2
## 3 2017-04-01 00:02:00 52.7 52.7 73 4.7
## 4 2017-04-01 00:03:00 52.7 52.7 72 3.7
## 5 2017-04-01 00:04:00 52.7 52.7 72 3.7
## 6 2017-04-01 00:05:00 52.7 52.7 72 3.7
## Gust Month Year Temp.in.Celcius
## 1 6.2 4 2017 1
## 2 6.2 4 2017 2
## 3 6.2 4 2017 3
## 4 3.7 4 2017 4
## 5 3.7 4 2017 5
## 6 3.7 4 2017 6
convertToCelsius<- function(x){
temp<- (x-32)*5/9
return(temp)
}
for (i in 1:length(weather[,1])){
#weather[i, "Temp.in.Celcius"]<- convertToCelsius(weather[i, "Temp.in.Farenheit"])
convertToCelsius(weather[i, "Temp.in.Farenheit"])
}
temp.in.Celcius<- convertToCelsius(weather["Temp.in.Farenheit"])
weather$Temp.in.Celcius <- temp.in.Celcius
weather["Temp.in.Celcius"] <- temp.in.Celcius
head(weather)
## Time Temp.in.Farenheit Wind.Chill Humidity.Average Wind.Speed
## 1 2017-04-01 00:00:00 52.7 52.7 72 6.2
## 2 2017-04-01 00:01:00 52.7 52.7 72 6.2
## 3 2017-04-01 00:02:00 52.7 52.7 73 4.7
## 4 2017-04-01 00:03:00 52.7 52.7 72 3.7
## 5 2017-04-01 00:04:00 52.7 52.7 72 3.7
## 6 2017-04-01 00:05:00 52.7 52.7 72 3.7
## Gust Month Year Temp.in.Celcius
## 1 6.2 4 2017 11.5
## 2 6.2 4 2017 11.5
## 3 6.2 4 2017 11.5
## 4 3.7 4 2017 11.5
## 5 3.7 4 2017 11.5
## 6 3.7 4 2017 11.5
Again, show the head of the file to illustrate these two steps worked.
summary(weather["Temp.in.Farenheit"])
## Temp.in.Farenheit
## Min. : 2.80
## 1st Qu.:42.30
## Median :55.00
## Mean :54.18
## 3rd Qu.:66.20
## Max. :91.70
summary(weather["Wind.Speed"])
## Wind.Speed
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 2.800
## Mean : 3.481
## 3rd Qu.: 5.600
## Max. :29.100
summary(weather["Gust"])
## Gust
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 4.200
## Mean : 5.364
## 3rd Qu.: 8.500
## Max. :44.500
(note: you don’t need to fill the matrix entry by entry individually. Remember subsetting in a matrix–how do you select the first row?)
Make sure you display your resulting matrix with appropriate row and column names!
weather.summary<- matrix(data = c(1:18), nrow = 3, ncol= 6)
#strsplit(summary(weather["Temp.in.Farenheit"]), ":")
justTheNumbers<- function(aDataframeCol){
bigVector<- c("")
for (i in 1:length(summary(aDataframeCol))){
newVar<- strsplit(summary(aDataframeCol), ":")[[i]][2]
bigVector[i]<- as.numeric(newVar)
}
return(bigVector)
}
weather.summary[1,]<-justTheNumbers(weather["Temp.in.Farenheit"])
weather.summary[2,]<-justTheNumbers(weather["Wind.Speed"])
weather.summary[3,]<-justTheNumbers(weather["Gust"])
rownames(weather.summary)<- c("Temp.in.Farenheit", "Wind.Speed", "Gust")
colnames(weather.summary)<-c("Min", "1st Qu", "Median", "Mean", "3rd Qu","Max")
weather.summary
## Min 1st Qu Median Mean 3rd Qu Max
## Temp.in.Farenheit "2.8" "42.3" "55" "54.18" "66.2" "91.7"
## Wind.Speed "0" "0" "2.8" "3.481" "5.6" "29.1"
## Gust "0" "0" "4.2" "5.364" "8.5" "44.5"
decembers<-c(which(weather["Month"]==12))
summary(weather[decembers, "Temp.in.Farenheit"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.3 33.0 41.0 41.1 48.6 71.3
###This code swaps month numbers for month names, then makes a new column.
NewMonthNames<-c(1:length(weather[,1]))
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 1632801
NewMonthNames[which(weather["Month"]==1)]<-"Jan"
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 1464502
NewMonthNames[which(weather["Month"]==2)]<-"Feb"
NewMonthNames[which(weather["Month"]==3)]<-"Mar"
NewMonthNames[which(weather["Month"]==4)]<-"Apr"
NewMonthNames[which(weather["Month"]==5)]<-"May"
NewMonthNames[which(weather["Month"]==6)]<-"Jun"
NewMonthNames[which(weather["Month"]==7)]<-"Jul"
NewMonthNames[which(weather["Month"]==8)]<-"Aug"
NewMonthNames[which(weather["Month"]==9)]<-"Sep"
NewMonthNames[which(weather["Month"]==10)]<-"Oct"
NewMonthNames[which(weather["Month"]==11)]<-"Nov"
NewMonthNames[which(weather["Month"]==12)]<-"Dec"
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 12
NewMonthNames<-c(1:length(weather[,1]))
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 1632801
NewMonthNames[which(weather["Month"]==1)]<-"January"
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 1464502
NewMonthNames[which(weather["Month"]==2)]<-"February"
NewMonthNames[which(weather["Month"]==3)]<-"March "
NewMonthNames[which(weather["Month"]==4)]<-"April "
NewMonthNames[which(weather["Month"]==5)]<-"May "
NewMonthNames[which(weather["Month"]==6)]<-"June "
NewMonthNames[which(weather["Month"]==7)]<-"July "
NewMonthNames[which(weather["Month"]==8)]<-"August "
NewMonthNames[which(weather["Month"]==9)]<-"September"
NewMonthNames[which(weather["Month"]==10)]<-"October "
NewMonthNames[which(weather["Month"]==11)]<-"November "
NewMonthNames[which(weather["Month"]==12)]<-"December "
weather["MonthName"]<-NewMonthNames
someVar<- c(unique(weather[,"MonthName"]))
length(someVar)
## [1] 12
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(weather, aes(x = reorder(MonthName, Month), y = Temp.in.Farenheit)) + geom_violin() + facet_grid("Year") + labs(title ="Temperature Distribution by Month, Jan 2017 - Aug 2020", x = "Month", y = "Temperature in Farenheit")+ theme(axis.text.x = element_text(angle = -45, hjust = -0.2))