1.1 Packages

library('tidyverse')
# Task: install lubridate
# install.packages("lubridate")
library('lubridate')

1.2 Data Task: download the datasets and place them into your working directory.

2.1 Loading in data in R

barometer <- read.csv('barometer-last-year.csv')
str(barometer)
## 'data.frame':    355 obs. of  2 variables:
##  $ DateTime: Factor w/ 355 levels "2016-10-09 00:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Baro    : num  1022 1020 1016 1013 1006 ...
barometer$DateTime = ymd_hms(barometer$DateTime)
str(barometer)
## 'data.frame':    355 obs. of  2 variables:
##  $ DateTime: POSIXct, format: "2016-10-09" "2016-10-10" ...
##  $ Baro    : num  1022 1020 1016 1013 1006 ...
# alternative method
barometer <- read.csv('barometer-last-year.csv') %>% 
  mutate(DateTime = ymd_hms(DateTime))
# Task: load in the other three datasets
indoor <- read.csv('indoor-temperature-last-year.csv') %>% 
  mutate(DateTime = ymd_hms(DateTime))
outside <- read.csv('outside-temperature-last-year.csv') %>% 
  mutate(DateTime = ymd_hms(DateTime))
rainfall <- read.csv('rainfall-last-year.csv') %>% 
  mutate(DateTime = ymd_hms(DateTime))
?left_join()
weather <- left_join(barometer, indoor, by = "DateTime")
head(weather)
##     DateTime   Baro Humidity Temperature Temperature_range..low.
## 1 2016-10-09 1021.9       54       21.93                    21.0
## 2 2016-10-10 1019.9       52       21.77                    20.4
## 3 2016-10-11 1015.8       51       21.36                    19.9
## 4 2016-10-12 1013.2       51       21.44                    20.0
## 5 2016-10-13 1005.9       52       21.22                    20.1
## 6 2016-10-14  998.6       52       21.02                    19.6
##   Temperature_range..high.
## 1                     22.8
## 2                     23.6
## 3                     23.0
## 4                     23.6
## 5                     22.3
## 6                     22.6
weather <- left_join(weather, outside,  by = "DateTime", suffix = c(".indoor", ".outdoor"))
head(weather)
##     DateTime   Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9       54              21.93
## 2 2016-10-10 1019.9       52              21.77
## 3 2016-10-11 1015.8       51              21.36
## 4 2016-10-12 1013.2       51              21.44
## 5 2016-10-13 1005.9       52              21.22
## 6 2016-10-14  998.6       52              21.02
##   Temperature_range..low..indoor Temperature_range..high..indoor
## 1                           21.0                            22.8
## 2                           20.4                            23.6
## 3                           19.9                            23.0
## 4                           20.0                            23.6
## 5                           20.1                            22.3
## 6                           19.6                            22.6
##   Temperature.outdoor Temperature_range..low..outdoor
## 1               10.66                             7.2
## 2                8.94                             5.6
## 3                8.69                             5.3
## 4               11.55                             9.0
## 5                9.40                             6.0
## 6                9.85                             6.8
##   Temperature_range..high..outdoor
## 1                             13.8
## 2                             12.8
## 3                             14.3
## 4                             14.9
## 5                             13.3
## 6                             13.3
weather <- left_join(weather, rainfall,  by = "DateTime", suffix = c(".indoor", ".outdoor"))
head(weather)
##     DateTime   Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9       54              21.93
## 2 2016-10-10 1019.9       52              21.77
## 3 2016-10-11 1015.8       51              21.36
## 4 2016-10-12 1013.2       51              21.44
## 5 2016-10-13 1005.9       52              21.22
## 6 2016-10-14  998.6       52              21.02
##   Temperature_range..low..indoor Temperature_range..high..indoor
## 1                           21.0                            22.8
## 2                           20.4                            23.6
## 3                           19.9                            23.0
## 4                           20.0                            23.6
## 5                           20.1                            22.3
## 6                           19.6                            22.6
##   Temperature.outdoor Temperature_range..low..outdoor
## 1               10.66                             7.2
## 2                8.94                             5.6
## 3                8.69                             5.3
## 4               11.55                             9.0
## 5                9.40                             6.0
## 6                9.85                             6.8
##   Temperature_range..high..outdoor  mm
## 1                             13.8 0.0
## 2                             12.8 0.0
## 3                             14.3 0.0
## 4                             14.9 0.0
## 5                             13.3 0.0
## 6                             13.3 1.1
weather <- left_join(barometer, indoor, by = "DateTime") %>% 
  left_join(outside, by = "DateTime", suffix = c(".indoor", ".outside")) %>%
  left_join(rainfall, by = "DateTime")
head(weather)
##     DateTime   Baro Humidity Temperature.indoor
## 1 2016-10-09 1021.9       54              21.93
## 2 2016-10-10 1019.9       52              21.77
## 3 2016-10-11 1015.8       51              21.36
## 4 2016-10-12 1013.2       51              21.44
## 5 2016-10-13 1005.9       52              21.22
## 6 2016-10-14  998.6       52              21.02
##   Temperature_range..low..indoor Temperature_range..high..indoor
## 1                           21.0                            22.8
## 2                           20.4                            23.6
## 3                           19.9                            23.0
## 4                           20.0                            23.6
## 5                           20.1                            22.3
## 6                           19.6                            22.6
##   Temperature.outside Temperature_range..low..outside
## 1               10.66                             7.2
## 2                8.94                             5.6
## 3                8.69                             5.3
## 4               11.55                             9.0
## 5                9.40                             6.0
## 6                9.85                             6.8
##   Temperature_range..high..outside  mm
## 1                             13.8 0.0
## 2                             12.8 0.0
## 3                             14.3 0.0
## 4                             14.9 0.0
## 5                             13.3 0.0
## 6                             13.3 1.1

2.3 Summaries

summary(weather)
##     DateTime                        Baro           Humidity    
##  Min.   :2016-10-09 00:00:00   Min.   : 979.6   Min.   :37.00  
##  1st Qu.:2017-01-06 12:00:00   1st Qu.:1004.9   1st Qu.:44.00  
##  Median :2017-04-06 00:00:00   Median :1010.5   Median :48.00  
##  Mean   :2017-04-06 19:56:37   Mean   :1010.0   Mean   :48.52  
##  3rd Qu.:2017-07-03 12:00:00   3rd Qu.:1016.0   3rd Qu.:52.00  
##  Max.   :2017-10-09 00:00:00   Max.   :1035.6   Max.   :59.00  
##                                                 NA's   :1      
##  Temperature.indoor Temperature_range..low..indoor
##  Min.   :18.04      Min.   :14.90                 
##  1st Qu.:20.34      1st Qu.:18.73                 
##  Median :21.71      Median :20.60                 
##  Mean   :21.83      Mean   :20.56                 
##  3rd Qu.:22.71      3rd Qu.:21.90                 
##  Max.   :29.21      Max.   :28.20                 
##  NA's   :1          NA's   :1                     
##  Temperature_range..high..indoor Temperature.outside
##  Min.   :19.70                   Min.   :-1.81      
##  1st Qu.:22.50                   1st Qu.: 7.39      
##  Median :23.20                   Median :10.96      
##  Mean   :23.53                   Mean   :11.14      
##  3rd Qu.:24.10                   3rd Qu.:15.05      
##  Max.   :31.10                   Max.   :26.38      
##  NA's   :1                                          
##  Temperature_range..low..outside Temperature_range..high..outside
##  Min.   :-4.100                  Min.   : 1.50                   
##  1st Qu.: 4.350                  1st Qu.:10.25                   
##  Median : 8.000                  Median :15.10                   
##  Mean   : 7.866                  Mean   :15.52                   
##  3rd Qu.:12.050                  3rd Qu.:19.85                   
##  Max.   :18.700                  Max.   :38.50                   
##                                                                  
##        mm        
##  Min.   : 0.000  
##  1st Qu.: 0.000  
##  Median : 0.000  
##  Mean   : 1.549  
##  3rd Qu.: 1.100  
##  Max.   :23.200  
##  NA's   :2
mean(weather$mm)
## [1] NA
mean(weather$mm, na.rm = TRUE)
## [1] 1.548725
apply(weather, 2, sd, na.rm = T)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm
## = na.rm): NAs introduced by coercion
##                         DateTime                             Baro 
##                               NA                         9.869662 
##                         Humidity               Temperature.indoor 
##                         5.188886                         2.058307 
##   Temperature_range..low..indoor  Temperature_range..high..indoor 
##                         2.405125                         1.701466 
##              Temperature.outside  Temperature_range..low..outside 
##                         5.355042                         4.878930 
## Temperature_range..high..outside                               mm 
##                         7.034445                         3.324599
apply(weather, 2, sd, na.rm = T)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm
## = na.rm): NAs introduced by coercion
##                         DateTime                             Baro 
##                               NA                         9.869662 
##                         Humidity               Temperature.indoor 
##                         5.188886                         2.058307 
##   Temperature_range..low..indoor  Temperature_range..high..indoor 
##                         2.405125                         1.701466 
##              Temperature.outside  Temperature_range..low..outside 
##                         5.355042                         4.878930 
## Temperature_range..high..outside                               mm 
##                         7.034445                         3.324599
# Task: Compute the min, max, sd for each weather measurement in a single table and export this to a new CSV file using write.csv()
SD <- apply(weather[,-1], 2, sd, na.rm = T)
MEAN <- apply(weather[,-1], 2, mean, na.rm = T)
MAX <- apply(weather[,-1], 2, max, na.rm = T)
MIN <- apply(weather[,-1], 2, min, na.rm = T)
summaries <- rbind(SD, MEAN) %>% rbind(MAX) %>% rbind(MIN)
write.csv(x = summaries, file = 'weather-summaries.csv')

3.1 Correlation

cor(weather$Temperature_range..low..outside, weather$Temperature_range..high..outside)
## [1] 0.8394635
# Task: find the correlation between all pairs of the weather measurements. You will need to look at all the documentation to see how cor handles missing data.

3.2 Plots

plot(weather$Temperature_range..low..outside, weather$Temperature_range..high..outside)

ggplot(weather, aes(x = Temperature_range..low..outside, y = Temperature_range..high..outside)) + geom_point() + facet_wrap(~month(DateTime, label = T))

# Task: make a plot to investigate a two or three way relationship of your choice.
ggplot(weather, aes(x = Temperature_range..low..indoor, y = Temperature_range..high..indoor)) + geom_point() + facet_wrap(~month(DateTime, label = T))
## Warning: Removed 1 rows containing missing values (geom_point).

4 Linear Models Demo

model <- lm(Temperature_range..low..outside ~ Temperature_range..high..outside +
              sin(2*pi*yday(DateTime)/365) + cos(2*pi*yday(DateTime)/365), data=weather)
summary(model)
## 
## Call:
## lm(formula = Temperature_range..low..outside ~ Temperature_range..high..outside + 
##     sin(2 * pi * yday(DateTime)/365) + cos(2 * pi * yday(DateTime)/365), 
##     data = weather)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7863 -1.8686  0.0651  1.7131  6.2304 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       1.77682    0.59352   2.994  0.00295 ** 
## Temperature_range..high..outside  0.39592    0.03687  10.740  < 2e-16 ***
## sin(2 * pi * yday(DateTime)/365) -1.26543    0.18627  -6.794 4.69e-11 ***
## cos(2 * pi * yday(DateTime)/365) -2.04629    0.36633  -5.586 4.67e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.442 on 351 degrees of freedom
## Multiple R-squared:  0.7515, Adjusted R-squared:  0.7494 
## F-statistic: 353.9 on 3 and 351 DF,  p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(model)

pairs(weather)

pairs(weather[,1:5])

ggplot(weather, aes(x = Temperature_range..low..outside, 
                    y = Temperature_range..high..outside)) +
  geom_point()