Lab_5_Assignment

Author

Dinah Marion Abeja

1. Load Libraries.

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.2.1     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(ggsci)
library(patchwork)
library(Hmisc)
Loading required package: lattice
Loading required package: survival
Loading required package: Formula

Attaching package: 'Hmisc'

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units
library(performance)
library(lubridate)

Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

2. Load data from Old Faithful. Assess the correlation between waiting time and eruption time using a Spearman’s test and a scatter plot. Report Spearman’s Rho and interpret the results

faithful<-faithful
head(faithful)
  eruptions waiting
1     3.600      79
2     1.800      54
3     3.333      74
4     2.283      62
5     4.533      85
6     2.883      55
#eruptions = length of eruption time in minutes
#waiting = time between eruptions in minutes
ggplot(data= faithful, mapping = aes(x = waiting, y= eruptions))+
  geom_point()+
  geom_smooth(method = lm, se = FALSE, fullrange= TRUE) + # Se - Don't add shaded confidence region
  labs(title = "Relationship between length of eruption and waiting time") +
  theme(plot.title = element_text(hjust = 0.5))
`geom_smooth()` using formula = 'y ~ x'

  theme_classic()
List of 94
 $ line                      :List of 6
  ..$ colour       : chr "black"
  ..$ linewidth    : num 0.5
  ..$ linetype     : num 1
  ..$ lineend      : chr "butt"
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ rect                      :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : chr "black"
  ..$ linewidth    : num 0.5
  ..$ linetype     : num 1
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ text                      :List of 11
  ..$ family       : chr ""
  ..$ face         : chr "plain"
  ..$ colour       : chr "black"
  ..$ size         : num 11
  ..$ hjust        : num 0.5
  ..$ vjust        : num 0.5
  ..$ angle        : num 0
  ..$ lineheight   : num 0.9
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ title                     : NULL
 $ aspect.ratio              : NULL
 $ axis.title                : NULL
 $ axis.title.x              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.top          :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.bottom       : NULL
 $ axis.title.y              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : num 90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.y.left         : NULL
 $ axis.title.y.right        :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : num -90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text                 :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : chr "grey30"
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.top           :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.bottom        : NULL
 $ axis.text.y               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 1
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.y.left          : NULL
 $ axis.text.y.right         :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.ticks                :List of 6
  ..$ colour       : chr "grey20"
  ..$ linewidth    : NULL
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ axis.ticks.x              : NULL
 $ axis.ticks.x.top          : NULL
 $ axis.ticks.x.bottom       : NULL
 $ axis.ticks.y              : NULL
 $ axis.ticks.y.left         : NULL
 $ axis.ticks.y.right        : NULL
 $ axis.ticks.length         : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ axis.ticks.length.x       : NULL
 $ axis.ticks.length.x.top   : NULL
 $ axis.ticks.length.x.bottom: NULL
 $ axis.ticks.length.y       : NULL
 $ axis.ticks.length.y.left  : NULL
 $ axis.ticks.length.y.right : NULL
 $ axis.line                 :List of 6
  ..$ colour       : chr "black"
  ..$ linewidth    : 'rel' num 1
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ axis.line.x               : NULL
 $ axis.line.x.top           : NULL
 $ axis.line.x.bottom        : NULL
 $ axis.line.y               : NULL
 $ axis.line.y.left          : NULL
 $ axis.line.y.right         : NULL
 $ legend.background         :List of 5
  ..$ fill         : NULL
  ..$ colour       : logi NA
  ..$ linewidth    : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ legend.margin             : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
  ..- attr(*, "unit")= int 8
 $ legend.spacing            : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
 $ legend.spacing.x          : NULL
 $ legend.spacing.y          : NULL
 $ legend.key                : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.key.size           : 'simpleUnit' num 1.2lines
  ..- attr(*, "unit")= int 3
 $ legend.key.height         : NULL
 $ legend.key.width          : NULL
 $ legend.text               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.text.align         : NULL
 $ legend.title              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.title.align        : NULL
 $ legend.position           : chr "right"
 $ legend.direction          : NULL
 $ legend.justification      : chr "center"
 $ legend.box                : NULL
 $ legend.box.just           : NULL
 $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
  ..- attr(*, "unit")= int 1
 $ legend.box.background     : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.box.spacing        : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
 $ panel.background          :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : logi NA
  ..$ linewidth    : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ panel.border              : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ panel.spacing             : 'simpleUnit' num 5.5points
  ..- attr(*, "unit")= int 8
 $ panel.spacing.x           : NULL
 $ panel.spacing.y           : NULL
 $ panel.grid                :List of 6
  ..$ colour       : chr "grey92"
  ..$ linewidth    : NULL
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ panel.grid.major          : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ panel.grid.minor          : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ panel.grid.major.x        : NULL
 $ panel.grid.major.y        : NULL
 $ panel.grid.minor.x        : NULL
 $ panel.grid.minor.y        : NULL
 $ panel.ontop               : logi FALSE
 $ plot.background           :List of 5
  ..$ fill         : NULL
  ..$ colour       : chr "white"
  ..$ linewidth    : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ plot.title                :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 1.2
  ..$ hjust        : num 0
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.title.position       : chr "panel"
 $ plot.subtitle             :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.caption              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : num 1
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 5.5points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.caption.position     : chr "panel"
 $ plot.tag                  :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 1.2
  ..$ hjust        : num 0.5
  ..$ vjust        : num 0.5
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.tag.position         : chr "topleft"
 $ plot.margin               : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
  ..- attr(*, "unit")= int 8
 $ strip.background          :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : chr "black"
  ..$ linewidth    : 'rel' num 2
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ strip.background.x        : NULL
 $ strip.background.y        : NULL
 $ strip.clip                : chr "inherit"
 $ strip.placement           : chr "inside"
 $ strip.text                :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : chr "grey10"
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 4.4points 4.4points 4.4points 4.4points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ strip.text.x              : NULL
 $ strip.text.y              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : num -90
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ strip.switch.pad.grid     : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ strip.switch.pad.wrap     : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ strip.text.y.left         :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : num 90
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi TRUE
 - attr(*, "validate")= logi TRUE
cor.test(faithful$eruptions, faithful$waiting, method = "spearman")
Warning in cor.test.default(faithful$eruptions, faithful$waiting, method =
"spearman"): Cannot compute exact p-value with ties

    Spearman's rank correlation rho

data:  faithful$eruptions and faithful$waiting
S = 744659, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.7779721 

The (r=0.77797) positive and a (p value = 2.2e-16 < 0.05) suggest that there is a statistically significant relatively strong positive correlation between eruption time and waiting time.

3. DO A CHI-SQUARE. First we need some categorical data! So, I will make it for you :)

#distribution of class of patients admitted to self poisoning (A) and gastro units (B)
#socioclass
socioclass<-c("I","II","III","IV","V")
selfpoison<-c(17,25,39,42,32)
gastro<-c(22,46,73,91,57)

patients<-data.frame(socioclass,selfpoison,gastro)
head(patients)
  socioclass selfpoison gastro
1          I         17     22
2         II         25     46
3        III         39     73
4         IV         42     91
5          V         32     57
str(patients)
'data.frame':   5 obs. of  3 variables:
 $ socioclass: chr  "I" "II" "III" "IV" ...
 $ selfpoison: num  17 25 39 42 32
 $ gastro    : num  22 46 73 91 57
#remove the categories so the data are clean for chi-square
pat2<-patients[,-1]

#perform a chisq test -- write a hypothesis and then perform the test!
chisq.test(pat2)

    Pearson's Chi-squared test

data:  pat2
X-squared = 1.9885, df = 4, p-value = 0.7379

\(H_0\):The two variables are independent. (There is no correlation between the number of patients from various socioclasses admitted to gastro and self poison units)

\(H_1\):The two variables relate to each other.(There is a correlation between the number of patients from various socioclasses admitted to gastro and self poison units)

X-squared = 1.9885, and p-value = 0.7379(statistically not significant). Therefore, we fail to reject the null hypothesis and conclude that the two variables are in fact not related to each other. That is; they are independent and that there’s is no difference in going to poison control or gastro units

4. Perform a simple linear regression on the faithful data and check all assumptions. Report the summary your regression, interpret it, and provide a graph to go along with it–with a regression line. Next, test your assumptions and report the results. Is the relationship between your two variables linear? Don’t forget a hypothesis!

p<-ggplot(data= faithful, mapping = aes(x = waiting, y= eruptions))+
  geom_point()+
  geom_smooth(method = lm, se = TRUE, fullrange= TRUE) + # Se - Don't add shaded confidence region
  labs(title = "Relationship between length of eruption and waiting time") +
  theme(plot.title = element_text(hjust = 0.5))+
  theme_classic()
p
`geom_smooth()` using formula = 'y ~ x'

testing assumptions

faith_lm <- lm(eruptions ~ waiting,data= faithful)
summary(faith_lm)

Call:
lm(formula = eruptions ~ waiting, data = faithful)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.29917 -0.37689  0.03508  0.34909  1.19329 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.874016   0.160143  -11.70   <2e-16 ***
waiting      0.075628   0.002219   34.09   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4965 on 270 degrees of freedom
Multiple R-squared:  0.8115,    Adjusted R-squared:  0.8108 
F-statistic:  1162 on 1 and 270 DF,  p-value: < 2.2e-16
check_model(faith_lm)

summary(faith_lm)

Call:
lm(formula = eruptions ~ waiting, data = faithful)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.29917 -0.37689  0.03508  0.34909  1.19329 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.874016   0.160143  -11.70   <2e-16 ***
waiting      0.075628   0.002219   34.09   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4965 on 270 degrees of freedom
Multiple R-squared:  0.8115,    Adjusted R-squared:  0.8108 
F-statistic:  1162 on 1 and 270 DF,  p-value: < 2.2e-16
  • linearity is met ( strong positive relationship between eruption time and waiting time - plot p)
  • Normality is met as the points fall flat and horizontal on the reference line
  • Equal variance is not quite met (homogeneity of variance plot)
  • Outliers - from the Leverage plot, there does not see to be any high leverage or very influential outliers and so this condition is also met.
  • Independence: we do not have much information of how the participants in the study were selected, however we can assume independence.

5 Load palmerpenguins. Using the penguins data, assess whether there is a difference in body mass between Adelie and Gentoo penguins. If there is a difference, assess the direction of the difference (one tends to be bigger than the other, etc). You should: a.) Tell me what statistical test you are using, b.) filter the data appropriately, c.) run your test, d.) make a corresponding visual to show the appropriate differences (means + error bars!). Write a hypothesis, run your test, interpret the test-statistics and use the stats + graph to tell me the result.

a)

  • T.test
library(palmerpenguins)
pengs <- penguins
head(pengs)
# A tibble: 6 × 8
  species island    bill_length_mm bill_depth_mm flipper_l…¹ body_…² sex    year
  <fct>   <fct>              <dbl>         <dbl>       <int>   <int> <fct> <int>
1 Adelie  Torgersen           39.1          18.7         181    3750 male   2007
2 Adelie  Torgersen           39.5          17.4         186    3800 fema…  2007
3 Adelie  Torgersen           40.3          18           195    3250 fema…  2007
4 Adelie  Torgersen           NA            NA            NA      NA <NA>   2007
5 Adelie  Torgersen           36.7          19.3         193    3450 fema…  2007
6 Adelie  Torgersen           39.3          20.6         190    3650 male   2007
# … with abbreviated variable names ¹​flipper_length_mm, ²​body_mass_g

b)

pengs_gentoo <- pengs %>%
  filter(species == "Gentoo") %>%
drop_na()
head(pengs_gentoo)
# A tibble: 6 × 8
  species island bill_length_mm bill_depth_mm flipper_leng…¹ body_…² sex    year
  <fct>   <fct>           <dbl>         <dbl>          <int>   <int> <fct> <int>
1 Gentoo  Biscoe           46.1          13.2            211    4500 fema…  2007
2 Gentoo  Biscoe           50            16.3            230    5700 male   2007
3 Gentoo  Biscoe           48.7          14.1            210    4450 fema…  2007
4 Gentoo  Biscoe           50            15.2            218    5700 male   2007
5 Gentoo  Biscoe           47.6          14.5            215    5400 male   2007
6 Gentoo  Biscoe           46.5          13.5            210    4550 fema…  2007
# … with abbreviated variable names ¹​flipper_length_mm, ²​body_mass_g
pengs_adelie <- pengs %>%
  filter(species =="Adelie") %>%
drop_na()
head(pengs_adelie)
# A tibble: 6 × 8
  species island    bill_length_mm bill_depth_mm flipper_l…¹ body_…² sex    year
  <fct>   <fct>              <dbl>         <dbl>       <int>   <int> <fct> <int>
1 Adelie  Torgersen           39.1          18.7         181    3750 male   2007
2 Adelie  Torgersen           39.5          17.4         186    3800 fema…  2007
3 Adelie  Torgersen           40.3          18           195    3250 fema…  2007
4 Adelie  Torgersen           36.7          19.3         193    3450 fema…  2007
5 Adelie  Torgersen           39.3          20.6         190    3650 male   2007
6 Adelie  Torgersen           38.9          17.8         181    3625 fema…  2007
# … with abbreviated variable names ¹​flipper_length_mm, ²​body_mass_g

c)

t.test(pengs_adelie$body_mass_g, pengs_gentoo$body_mass_g)

    Welch Two Sample t-test

data:  pengs_adelie$body_mass_g and pengs_gentoo$body_mass_g
t = -23.254, df = 242.14, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1503.702 -1268.843
sample estimates:
mean of x mean of y 
 3706.164  5092.437 

pvalue = 2.2e-16 and thus there is a significant difference between the body mass of Adelie and Gentoo

d)

pengs_1 <- pengs %>%
filter(species!="Chinstrap") %>%
  drop_na() %>%
droplevels()

head(pengs_1)
# A tibble: 6 × 8
  species island    bill_length_mm bill_depth_mm flipper_l…¹ body_…² sex    year
  <fct>   <fct>              <dbl>         <dbl>       <int>   <int> <fct> <int>
1 Adelie  Torgersen           39.1          18.7         181    3750 male   2007
2 Adelie  Torgersen           39.5          17.4         186    3800 fema…  2007
3 Adelie  Torgersen           40.3          18           195    3250 fema…  2007
4 Adelie  Torgersen           36.7          19.3         193    3450 fema…  2007
5 Adelie  Torgersen           39.3          20.6         190    3650 male   2007
6 Adelie  Torgersen           38.9          17.8         181    3625 fema…  2007
# … with abbreviated variable names ¹​flipper_length_mm, ²​body_mass_g
levels(pengs_1$species)
[1] "Adelie" "Gentoo"
mean_pengs<-pengs_1 %>%
  group_by(species) %>%
  summarise(mean_bm=mean(body_mass_g), sd_bm=sd(body_mass_g), n=n(), se_bm=sd_bm/sqrt(n))
head(mean_pengs)
# A tibble: 2 × 5
  species mean_bm sd_bm     n se_bm
  <fct>     <dbl> <dbl> <int> <dbl>
1 Adelie    3706.  459.   146  38.0
2 Gentoo    5092.  501.   119  46.0
p1<-ggplot(mean_pengs, aes(x=species, y=mean_bm, color=species))+
  geom_point()+
  geom_errorbar(aes(x=species, ymin=mean_bm-se_bm, ymax=mean_bm+se_bm), width=0.2)+
  scale_color_aaas()+
  theme_classic()+
  labs(title='Body mass of penguins')+
  theme(plot.title = element_text(hjust = 0.5))
p1

\(H_0\):There is no difference in body mass between Adelie and Gentoo penguins

\(H_1\):There is a difference in body mass between Adelie and Gentoo penguins

The p-value = 2.2e-16 and so there is a significant difference in the body mass between Adelie and Gentoo penguins. The scatter plot shows that the average body mass of the Gentoo penguins is higher than Adelie Penguins.


1. Make your essentials and depth components into tabsets using panel::tabset. Note that this is a visual change you can make to Quarto documents - this is testing your ability to modify Quarto docs and to google/find answers to common problems in R

2. Make your t-test graph from Essentials better – add the raw data in behind your means + error bars

p1<-ggplot(mean_pengs, aes(y=mean_bm, x=species, color = species))+
  geom_point()+
  labs(title='Body mass of penguins')+
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5))
  
p1+geom_errorbar(aes(x=species, ymin=mean_bm-se_bm, ymax=mean_bm+se_bm), width=0.2)+
  scale_color_aaas()+
geom_jitter(data=pengs_1,aes(y = body_mass_g, x = species))

3 What are the effects of body_mass_g and sex on bill_length_mm in Chinstrap penguins? Setup a more complex linear regression to test this! You have 2 variables of interest. You can choose for this to be interactive or additive. Write a hypothesis, filter data as needed, run the test, make a graph, interpret the results.

\(H_0\): The body mass and sex do not affect the bill length of the Chinstrap penguins.

\(H_1\): The body mass and sex does have an affect on the bill length of the Chinstrap penguins.

pengs_2 <- pengs %>%
filter(species =="Chinstrap") %>%
  drop_na() %>%
droplevels()
head(pengs_2)
# A tibble: 6 × 8
  species   island bill_length_mm bill_depth_mm flipper_le…¹ body_…² sex    year
  <fct>     <fct>           <dbl>         <dbl>        <int>   <int> <fct> <int>
1 Chinstrap Dream            46.5          17.9          192    3500 fema…  2007
2 Chinstrap Dream            50            19.5          196    3900 male   2007
3 Chinstrap Dream            51.3          19.2          193    3650 male   2007
4 Chinstrap Dream            45.4          18.7          188    3525 fema…  2007
5 Chinstrap Dream            52.7          19.8          197    3725 male   2007
6 Chinstrap Dream            45.2          17.8          198    3950 fema…  2007
# … with abbreviated variable names ¹​flipper_length_mm, ²​body_mass_g
chin_lm <- lm(bill_length_mm ~ body_mass_g*sex,data= pengs_2)
anova(chin_lm)
Analysis of Variance Table

Response: bill_length_mm
                Df Sum Sq Mean Sq F value    Pr(>F)    
body_mass_g      1 197.10 197.101 34.0126 1.959e-07 ***
sex              1 172.66 172.661 29.7951 8.337e-07 ***
body_mass_g:sex  1   6.45   6.453  1.1136    0.2953    
Residuals       64 370.88   5.795                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(data= pengs_2, mapping = aes(x = body_mass_g, y= bill_length_mm, color = sex))+
  geom_point()+
  geom_smooth(method = lm, se = TRUE, fullrange= TRUE) + # Se - Don't add shaded confidence region
  labs(title = "Effect of body mass and sex on the bill length of Chinstrap Penguins") +
  theme(plot.title = element_text(hjust = 0.5))+
  theme_classic()
`geom_smooth()` using formula = 'y ~ x'

The p values =1.959e-07 and 8.337e-07 show that both the body mass and sex respectively, have a significant effect on the bill length of the Chinstrap penguins. interactive effect of body mass and sex on bill length. However, the interactive effect of body mass and sex does not have significant effect on bill length of chinstrap(p value = 0.2953). This can further be seen on the plot, where we see a relatively strong correlation between the body mass and sex but no significant correlation in bill length and body mass within the sex categories (slopes are pretty much flat)

4 Using the dataset below, please assess whether there is a difference in number of bigfoot sightings per year between Ohio and Washington. You will have to read the data in, filter by state, make a column for year (modifying date using lubridate), group_by state & year, then count rows (each row= a bigfoot sighting). This data frame will allow you to do your t-test! Next, calculate a mean across years– you can use this to make a graph. In short, I want you to run a t-test to assess the H0 that there is no difference in annual bigfoot sightings between Ohio and Washington. Then I want to see a graph showing means + error. Finally, interpret this graph + stats. This requires use of a skill from every lab so far!

bigfoot <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-13/bigfoot.csv')
Rows: 5021 Columns: 28
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): observed, location_details, county, state, season, title, classif...
dbl  (17): latitude, longitude, number, temperature_high, temperature_mid, t...
date  (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
big1<- bigfoot %>%
filter(state == "Ohio" | state =="Washington")
big1$date <- year(big1$date)
big2 <- big1 %>%
group_by(state,date) %>%
  drop_na("date")%>%
dplyr::summarize(count=n())
`summarise()` has grouped output by 'state'. You can override using the
`.groups` argument.
big2$sighting <- big2$count
t.test(data=big2, sighting~state)

    Welch Two Sample t-test

data:  sighting by state
t = -3.1298, df = 98.619, p-value = 0.002301
alternative hypothesis: true difference in means between group Ohio and group Washington is not equal to 0
95 percent confidence interval:
 -5.292127 -1.185345
sample estimates:
      mean in group Ohio mean in group Washington 
                5.053571                 8.292308 
head(big2)
# A tibble: 6 × 4
# Groups:   state [1]
  state  date count sighting
  <chr> <dbl> <int>    <int>
1 Ohio   1956     1        1
2 Ohio   1958     2        2
3 Ohio   1962     1        1
4 Ohio   1963     1        1
5 Ohio   1967     1        1
6 Ohio   1968     1        1
big3 <- big2 %>%
  group_by(state) %>%
  dplyr::summarize(mean_sightings = mean(sighting),sd_sightings = sd(sighting), n = n(), se = sd_sightings/sqrt(n))
 tibble(big3)
# A tibble: 2 × 5
  state      mean_sightings sd_sightings     n    se
  <chr>               <dbl>        <dbl> <int> <dbl>
1 Ohio                 5.05         3.76    56 0.502
2 Washington           8.29         7.29    65 0.905
p5<- ggplot() +
  geom_point(data = big3, aes(x = state, y = mean_sightings, color = state)) +
  labs(x="State", y="Mean Annual Sightings", title = "Comparison of Mean annual Bigfoot Sightings in Washington and Ohio", color = "State") +
  theme(plot.title = element_text(vjust = 0.5)) +
  theme_classic()


p5+geom_errorbar(data = big3, aes(x = state, ymin = mean_sightings - se, ymax = mean_sightings + se, color = state)) +
  geom_jitter(data = big2, aes(x = state, y = sighting))

\(H_0\) that there is no difference in annual bigfoot sightings between Ohio and Washington

\(H_A\) that there is a difference in annual bigfoot sightings between Ohio and Washington

  • The p-value = 0.002301 < 0.05 from the t.test and so we can reject the null hypothesis that there is no annual difference in bigfoot sightings between Ohio and Washington. OR This shows that there is a statistically significant difference in the annual sightings of bigfoot in Washington and Ohio. The model output for mean bigfoot sightings in Ohio and Washington 5.053571 and 8.292308 respectively.

The plot also shows that there is definitely difference in mean annual sightings of bigfoot between Ohio and Washington. In particular, Washington has higher mean annual bigfoot sightings than Ohio.