1.1 Packages
library('tidyverse')
# Task: install lubridate
# install.packages("lubridate")
library('lubridate')
1.2 Data Task: download the datasets and place them into your working directory.
2.1 Loading in data in R
barometer <- read.csv('barometer-last-year.csv')
str(barometer)
## 'data.frame': 355 obs. of 2 variables:
## $ DateTime: Factor w/ 355 levels "2016-10-09 00:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Baro : num 1022 1020 1016 1013 1006 ...
barometer$DateTime = ymd_hms(barometer$DateTime)
str(barometer)
## 'data.frame': 355 obs. of 2 variables:
## $ DateTime: POSIXct, format: "2016-10-09" "2016-10-10" ...
## $ Baro : num 1022 1020 1016 1013 1006 ...
# alternative method
barometer <- read.csv('barometer-last-year.csv') %>%
mutate(DateTime = ymd_hms(DateTime))
# Task: load in the other three datasets
indoor <- read.csv('indoor-temperature-last-year.csv') %>%
mutate(DateTime = ymd_hms(DateTime))
outside <- read.csv('outside-temperature-last-year.csv') %>%
mutate(DateTime = ymd_hms(DateTime))
rainfall <- read.csv('rainfall-last-year.csv') %>%
mutate(DateTime = ymd_hms(DateTime))
?left_join()
weather <- left_join(barometer, indoor, by = "DateTime")
head(weather)
## DateTime Baro Humidity Temperature Temperature_range..low.
## 1 2016-10-09 1021.9 54 21.93 21.0
## 2 2016-10-10 1019.9 52 21.77 20.4
## 3 2016-10-11 1015.8 51 21.36 19.9
## 4 2016-10-12 1013.2 51 21.44 20.0
## 5 2016-10-13 1005.9 52 21.22 20.1
## 6 2016-10-14 998.6 52 21.02 19.6
## Temperature_range..high.
## 1 22.8
## 2 23.6
## 3 23.0
## 4 23.6
## 5 22.3
## 6 22.6
weather <- left_join(weather, outside, by = "DateTime", suffix = c(".indoor", ".outdoor"))
head(weather)
## DateTime Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9 54 21.93
## 2 2016-10-10 1019.9 52 21.77
## 3 2016-10-11 1015.8 51 21.36
## 4 2016-10-12 1013.2 51 21.44
## 5 2016-10-13 1005.9 52 21.22
## 6 2016-10-14 998.6 52 21.02
## Temperature_range..low..indoor Temperature_range..high..indoor
## 1 21.0 22.8
## 2 20.4 23.6
## 3 19.9 23.0
## 4 20.0 23.6
## 5 20.1 22.3
## 6 19.6 22.6
## Temperature.outdoor Temperature_range..low..outdoor
## 1 10.66 7.2
## 2 8.94 5.6
## 3 8.69 5.3
## 4 11.55 9.0
## 5 9.40 6.0
## 6 9.85 6.8
## Temperature_range..high..outdoor
## 1 13.8
## 2 12.8
## 3 14.3
## 4 14.9
## 5 13.3
## 6 13.3
weather <- left_join(weather, rainfall, by = "DateTime", suffix = c(".indoor", ".outdoor"))
head(weather)
## DateTime Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9 54 21.93
## 2 2016-10-10 1019.9 52 21.77
## 3 2016-10-11 1015.8 51 21.36
## 4 2016-10-12 1013.2 51 21.44
## 5 2016-10-13 1005.9 52 21.22
## 6 2016-10-14 998.6 52 21.02
## Temperature_range..low..indoor Temperature_range..high..indoor
## 1 21.0 22.8
## 2 20.4 23.6
## 3 19.9 23.0
## 4 20.0 23.6
## 5 20.1 22.3
## 6 19.6 22.6
## Temperature.outdoor Temperature_range..low..outdoor
## 1 10.66 7.2
## 2 8.94 5.6
## 3 8.69 5.3
## 4 11.55 9.0
## 5 9.40 6.0
## 6 9.85 6.8
## Temperature_range..high..outdoor mm
## 1 13.8 0.0
## 2 12.8 0.0
## 3 14.3 0.0
## 4 14.9 0.0
## 5 13.3 0.0
## 6 13.3 1.1
weather <- left_join(barometer, indoor, by = "DateTime") %>%
left_join(outside, by = "DateTime", suffix = c(".indoor", ".outside")) %>%
left_join(rainfall, by = "DateTime")
head(weather)
## DateTime Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9 54 21.93
## 2 2016-10-10 1019.9 52 21.77
## 3 2016-10-11 1015.8 51 21.36
## 4 2016-10-12 1013.2 51 21.44
## 5 2016-10-13 1005.9 52 21.22
## 6 2016-10-14 998.6 52 21.02
## Temperature_range..low..indoor Temperature_range..high..indoor
## 1 21.0 22.8
## 2 20.4 23.6
## 3 19.9 23.0
## 4 20.0 23.6
## 5 20.1 22.3
## 6 19.6 22.6
## Temperature.outside Temperature_range..low..outside
## 1 10.66 7.2
## 2 8.94 5.6
## 3 8.69 5.3
## 4 11.55 9.0
## 5 9.40 6.0
## 6 9.85 6.8
## Temperature_range..high..outside mm
## 1 13.8 0.0
## 2 12.8 0.0
## 3 14.3 0.0
## 4 14.9 0.0
## 5 13.3 0.0
## 6 13.3 1.1
2.3 Summaries
summary(weather)
## DateTime Baro Humidity
## Min. :2016-10-09 00:00:00 Min. : 979.6 Min. :37.00
## 1st Qu.:2017-01-06 12:00:00 1st Qu.:1004.9 1st Qu.:44.00
## Median :2017-04-06 00:00:00 Median :1010.5 Median :48.00
## Mean :2017-04-06 19:56:37 Mean :1010.0 Mean :48.52
## 3rd Qu.:2017-07-03 12:00:00 3rd Qu.:1016.0 3rd Qu.:52.00
## Max. :2017-10-09 00:00:00 Max. :1035.6 Max. :59.00
## NA's :1
## Temperature.indoor Temperature_range..low..indoor
## Min. :18.04 Min. :14.90
## 1st Qu.:20.34 1st Qu.:18.73
## Median :21.71 Median :20.60
## Mean :21.83 Mean :20.56
## 3rd Qu.:22.71 3rd Qu.:21.90
## Max. :29.21 Max. :28.20
## NA's :1 NA's :1
## Temperature_range..high..indoor Temperature.outside
## Min. :19.70 Min. :-1.81
## 1st Qu.:22.50 1st Qu.: 7.39
## Median :23.20 Median :10.96
## Mean :23.53 Mean :11.14
## 3rd Qu.:24.10 3rd Qu.:15.05
## Max. :31.10 Max. :26.38
## NA's :1
## Temperature_range..low..outside Temperature_range..high..outside
## Min. :-4.100 Min. : 1.50
## 1st Qu.: 4.350 1st Qu.:10.25
## Median : 8.000 Median :15.10
## Mean : 7.866 Mean :15.52
## 3rd Qu.:12.050 3rd Qu.:19.85
## Max. :18.700 Max. :38.50
##
## mm
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 1.549
## 3rd Qu.: 1.100
## Max. :23.200
## NA's :2
mean(weather$mm)
## [1] NA
mean(weather$mm, na.rm = TRUE)
## [1] 1.548725
apply(weather, 2, sd, na.rm = T)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm
## = na.rm): NAs introduced by coercion
## DateTime Baro
## NA 9.869662
## Humidity Temperature.indoor
## 5.188886 2.058307
## Temperature_range..low..indoor Temperature_range..high..indoor
## 2.405125 1.701466
## Temperature.outside Temperature_range..low..outside
## 5.355042 4.878930
## Temperature_range..high..outside mm
## 7.034445 3.324599
apply(weather, 2, sd, na.rm = T)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm
## = na.rm): NAs introduced by coercion
## DateTime Baro
## NA 9.869662
## Humidity Temperature.indoor
## 5.188886 2.058307
## Temperature_range..low..indoor Temperature_range..high..indoor
## 2.405125 1.701466
## Temperature.outside Temperature_range..low..outside
## 5.355042 4.878930
## Temperature_range..high..outside mm
## 7.034445 3.324599
# Task: Compute the min, max, sd for each weather measurement in a single table and export this to a new CSV file using write.csv()
SD <- apply(weather[,-1], 2, sd, na.rm = T)
MEAN <- apply(weather[,-1], 2, mean, na.rm = T)
MAX <- apply(weather[,-1], 2, max, na.rm = T)
MIN <- apply(weather[,-1], 2, min, na.rm = T)
summaries <- rbind(SD, MEAN) %>% rbind(MAX) %>% rbind(MIN)
write.csv(x = summaries, file = 'weather-summaries.csv')
3.1 Correlation
cor(weather$Temperature_range..low..outside, weather$Temperature_range..high..outside)
## [1] 0.8394635
# Task: find the correlation between all pairs of the weather measurements. You will need to look at all the documentation to see how cor handles missing data.
3.2 Plots
plot(weather$Temperature_range..low..outside, weather$Temperature_range..high..outside)
ggplot(weather, aes(x = Temperature_range..low..outside, y = Temperature_range..high..outside)) + geom_point() + facet_wrap(~month(DateTime, label = T))
# Task: make a plot to investigate a two or three way relationship of your choice.
ggplot(weather, aes(x = Temperature_range..low..indoor, y = Temperature_range..high..indoor)) + geom_point() + facet_wrap(~month(DateTime, label = T))
## Warning: Removed 1 rows containing missing values (geom_point).
4 Linear Models Demo
model <- lm(Temperature_range..low..outside ~ Temperature_range..high..outside +
sin(2*pi*yday(DateTime)/365) + cos(2*pi*yday(DateTime)/365), data=weather)
summary(model)
##
## Call:
## lm(formula = Temperature_range..low..outside ~ Temperature_range..high..outside +
## sin(2 * pi * yday(DateTime)/365) + cos(2 * pi * yday(DateTime)/365),
## data = weather)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7863 -1.8686 0.0651 1.7131 6.2304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.77682 0.59352 2.994 0.00295 **
## Temperature_range..high..outside 0.39592 0.03687 10.740 < 2e-16 ***
## sin(2 * pi * yday(DateTime)/365) -1.26543 0.18627 -6.794 4.69e-11 ***
## cos(2 * pi * yday(DateTime)/365) -2.04629 0.36633 -5.586 4.67e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.442 on 351 degrees of freedom
## Multiple R-squared: 0.7515, Adjusted R-squared: 0.7494
## F-statistic: 353.9 on 3 and 351 DF, p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(model)
pairs(weather)
pairs(weather[,1:5])
ggplot(weather, aes(x = Temperature_range..low..outside,
y = Temperature_range..high..outside)) +
geom_point()