Initial exploration
df2<-airquality
df2%>%head()
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
Tidy data
tidydf<-df2%>%rename(Solar=Solar.R)%>%
arrange(Temp, desc(Day))%>%
mutate(TempC=round((Temp-32)*5/9,0))%>%
select(-c(Temp, Day))%>%
rename(Temp=TempC)
tidydf%>%head()
## Ozone Solar Wind Month Temp
## 1 NA NA 14.3 5 13
## 2 NA NA 8.0 5 14
## 3 NA 66 16.6 5 14
## 4 6 78 18.4 5 14
## 5 NA 266 14.9 5 14
## 6 18 65 13.2 5 14
Average temperature by month
tidydf%>%select(Month, Temp)%>%
group_by(Month)%>%
summarise(Average=mean(Temp, na.rm = TRUE))
## # A tibble: 5 x 2
## Month Average
## <int> <dbl>
## 1 5 18.6
## 2 6 26.1
## 3 7 28.8
## 4 8 28.8
## 5 9 25
t-test
dummydf<-tidydf%>%mutate(Group=if_else(Month<8,0,1))
t.test(formula=Temp~Group, data=dummydf)
##
## Welch Two Sample t-test
##
## data: Temp by Group
## t = -3.0203, df = 143.3, p-value = 0.002991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.0907428 -0.8543748
## sample estimates:
## mean in group 0 mean in group 1
## 24.47826 26.95082