library(tidyverse)
library(ggbeeswarm)
library(broom)
library(mosaic)
Cloud seeding: In an experiment to determine whether seeding clouds with silver iodide increases rainfall, 52 clouds were randomly assigned to be seeded or not. The amount of rain they generated was then measured (in acre-feet). (Data and story taken from https://dasl.datadescription.com/datafile/cloud-seeding)
d <- read_tsv("http://www.stat.umn.edu/~rend0020/data/cloud-seeding.txt")
## Parsed with column specification:
## cols(
## `Unseeded Clouds` = col_double(),
## `Seeded Clouds` = col_double()
## )
Caution: this data is not in “tidy” format…
d <- d %>% gather("Trt", "Rain")
gf_boxplot(Rain ~ Trt, data=d, outlier.shape=NA) %>%
gf_point(position=position_beeswarm())
gf_dhistogram(~Rain|Trt, data=d, bins=7) %>% gf_dens() %>% gf_fitdistr()
gf_qq(~Rain|Trt, data=d) %>% gf_qqline()
gf_qq(~Rain, color=~Trt, data=d) %>% gf_qqline()
will use natural log, could also use log2 or log10
d <- d %>% mutate(logRain=log(Rain))
gf_boxplot(Rain ~ Trt, data=d, outlier.shape=NA) %>%
gf_point(position=position_beeswarm()) +
scale_y_log10()
gf_boxplot(logRain ~ Trt, data=d, outlier.shape=NA) %>%
gf_point(position=position_beeswarm())
gf_dhistogram(~logRain|Trt, data=d, bins=7) %>% gf_dens() %>% gf_fitdistr()
gf_qq(~logRain|Trt, data=d) %>% gf_qqline()
gf_qq(~logRain, color=~Trt, data=d) %>% gf_qqline()
ss <- d %>% group_by(Trt) %>% summarize(m=mean(logRain), s=sd(logRain), n=n()); ss
## # A tibble: 2 x 4
## Trt m s n
## <chr> <dbl> <dbl> <int>
## 1 Seeded Clouds 5.13 1.60 26
## 2 Unseeded Clouds 3.99 1.64 26
dif <- ss$m[1] - ss$m[2]; dif
## [1] 1.143781
SE <- sqrt(ss$s[1]^2/ss$n[1] + ss$s[2]^2/ss$n[2]); SE
## [1] 0.4495342
z <- dif/SE; z
## [1] 2.544369
df.guess <- min(ss$n) - 1; df.guess
## [1] 25
pval <- pt(z, df.guess, lower.tail=FALSE)*2; pval
## [1] 0.0175028
ME <- qt(1-0.05/2, df.guess)*SE; ME
## [1] 0.9258331
c(dif - ME, dif + ME)
## [1] 0.217948 2.069614
t.test(logRain ~ Trt, data=d)
##
## Welch Two Sample t-test
##
## data: logRain by Trt
## t = 2.5444, df = 49.966, p-value = 0.01408
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2408498 2.0467125
## sample estimates:
## mean in group Seeded Clouds mean in group Unseeded Clouds
## 5.134187 3.990406
out <- t.test(logRain ~ Trt, data=d) %>% tidy()
out %>% select(estimate, conf.low, conf.high, p.value) %>%
mutate(ratio.est = exp(estimate), ratio.low=exp(conf.low), ratio.high=exp(conf.high))
## # A tibble: 1 x 7
## estimate conf.low conf.high p.value ratio.est ratio.low ratio.high
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.14 0.241 2.05 0.0141 3.14 1.27 7.74