These exercises accompany the Basic Statistics tutorial.
Tip: Remember, you can use a ? in front of the function name to access the help documentation for any function (e.g. ?geom_boxplot from ggplot2)
airdata dataset in the region5air library, first subset the dataset down to four sites of interest: c(840170310032, 840170313103, 840550590019, 840180892008). Name this subsetted data four.sites. We can subset using a vector of site numbers by using the following code four.sites <- airdata[airdata$site %in% sites, ]. Then using either the subset() function or brackets[], subset four.sites down to values where the parameter column equals 44201 (the parameter value for ozone). Name this new dataset ozone.foursites.ggplot, create boxplots (geom_boxplot) of the ozone.foursites data, by site.ggplot create a density plot of all four sites with transparency set to 0.4psych library and the describeBy() function. Use ?describeBy for help with the function.dcast() function from the reshape2 package which will do this. You will learn more about reshaping data in the intermediate R training. Use ozone.long for the correlation test.library(reshape2)
ozone.long <- dcast(ozone.foursites, formula = datetime ~ site)
head(ozone.long)
library(region5air)
data(airdata)
sites <- c(840170310032, 840170313103, 840550590019, 840180892008)
four.sites <- airdata[airdata$site %in% sites, ]
ozone.foursites <- four.sites[four.sites$parameter== '44201', ]
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
p <- ggplot(ozone.foursites, aes(site, value)) + facet_grid(parameter ~ .,scales="free_y")
p + geom_boxplot(aes(fill=factor(site))) + theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
p <- ggplot(ozone.foursites, aes(x = value))
p <- p + geom_histogram()
p + facet_grid(site ~ .)
ggplot(ozone.foursites, aes(value, color=site, fill = site)) + geom_density(alpha=0.4)
library(psych)
describeBy(ozone.foursites$value, ozone.foursites$site)
## group: 840170310032
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 10076 0.03 0.02 0.03 0.03 0.02 0 0.09 0.09 0.08 -0.35 0
## --------------------------------------------------------
## group: 840170313103
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 14418 0.02 0.01 0.02 0.02 0.01 0 0.08 0.08 0.59 -0.01 0
## --------------------------------------------------------
## group: 840180892008
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 8441 0.03 0.02 0.03 0.03 0.01 0 0.08 0.08 0.07 -0.34 0
## --------------------------------------------------------
## group: 840550590019
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 10651 0.03 0.02 0.03 0.03 0.02 0 0.09 0.09 0.24 -0.12 0
pairwise.t.test(ozone.foursites$value, ozone.foursites$site)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: ozone.foursites$value and ozone.foursites$site
##
## 840170310032 840170313103 840180892008
## 840170313103 < 2e-16 - -
## 840180892008 6.4e-13 < 2e-16 -
## 840550590019 6.3e-12 < 2e-16 < 2e-16
##
## P value adjustment method: holm
library(reshape2)
ozone.long <- dcast(ozone.foursites, formula = datetime ~ site)
head(ozone.long)
## datetime 840170310032 840170313103 840180892008 840550590019
## 1 20130326T1600-0600 NA NA NA 0.044
## 2 20130326T1700-0600 NA NA NA 0.044
## 3 20130326T1800-0600 NA NA NA 0.040
## 4 20130326T1900-0600 NA NA NA 0.033
## 5 20130326T2000-0600 NA NA NA 0.037
## 6 20130326T2100-0600 NA NA NA 0.037
chi_air.cor <- cor(ozone.long[ , c(2:4)],
use ="complete.obs", method="pearson")
chi_air.cor
## 840170310032 840170313103 840180892008
## 840170310032 1.0000000 0.7400649 0.7881611
## 840170313103 0.7400649 1.0000000 0.8368989
## 840180892008 0.7881611 0.8368989 1.0000000