Initial exploration

df2<-airquality
df2%>%head()
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

Tidy data

tidydf<-df2%>%rename(Solar=Solar.R)%>%
  arrange(Temp, desc(Day))%>%
  mutate(TempC=round((Temp-32)*5/9,0))%>%
  select(-c(Temp, Day))%>%
  rename(Temp=TempC)

tidydf%>%head()
##   Ozone Solar Wind Month Temp
## 1    NA    NA 14.3     5   13
## 2    NA    NA  8.0     5   14
## 3    NA    66 16.6     5   14
## 4     6    78 18.4     5   14
## 5    NA   266 14.9     5   14
## 6    18    65 13.2     5   14

Average temperature by month

tidydf%>%select(Month, Temp)%>%
  group_by(Month)%>%
  summarise(Average=mean(Temp, na.rm = TRUE))
## # A tibble: 5 x 2
##   Month Average
##   <int>   <dbl>
## 1     5    18.6
## 2     6    26.1
## 3     7    28.8
## 4     8    28.8
## 5     9    25

t-test

dummydf<-tidydf%>%mutate(Group=if_else(Month<8,0,1))

t.test(formula=Temp~Group, data=dummydf)
## 
##  Welch Two Sample t-test
## 
## data:  Temp by Group
## t = -3.0203, df = 143.3, p-value = 0.002991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.0907428 -0.8543748
## sample estimates:
## mean in group 0 mean in group 1 
##        24.47826        26.95082