library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Problem 1

observed <- c(244, 192)
theor <- rep(1/2, 2)

expected <- theor*sum(observed)
expected
## [1] 218 218
chisq.test(observed)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 6.2018, df = 1, p-value = 0.01276

The low p-value of ~0.001 means that there is a statistically significant difference in the likelihood of ACTN3 allele R or X.

Problem 2

setwd("D:/DATA 101/Datasets")

data <- read_csv("NutritionStudy.csv")
## Rows: 315 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Smoke, Sex, VitaminUse
## dbl (14): ID, Age, Quetelet, Vitamin, Calories, Fat, Fiber, Alcohol, Cholest...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
observed <- table(data$VitaminUse, data$Sex)
observed
##             
##              Female Male
##   No             87   24
##   Occasional     77    5
##   Regular       109   13
chisq.test(observed)
## 
##  Pearson's Chi-squared test
## 
## data:  observed
## X-squared = 11.071, df = 2, p-value = 0.003944

The statistically significant p-value of 0.0039 indicates a significant association between a person’s vitamin usage and their gender.

Problem 3

setwd("D:/DATA 101/Datasets")

data2 <- read_csv("FishGills3.csv")
## Rows: 90 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Calcium
## dbl (1): GillRate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anova_test <- aov(GillRate ~ Calcium, data = data2)
anova_test
## Call:
##    aov(formula = GillRate ~ Calcium, data = data2)
## 
## Terms:
##                   Calcium Residuals
## Sum of Squares   2037.222 19064.333
## Deg. of Freedom         2        87
## 
## Residual standard error: 14.80305
## Estimated effects may be unbalanced
summary(anova_test)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Calcium      2   2037  1018.6   4.648 0.0121 *
## Residuals   87  19064   219.1                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Using the typical level of significance (5%), the ANOVA test presented a statistically significant p-value of 0.01, indicating that there is a relationship between the calcium levels in the water and fish gill beat rates.

TukeyHSD(anova_test)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GillRate ~ Calcium, data = data2)
## 
## $Calcium
##                  diff        lwr        upr     p adj
## Low-High    10.333333   1.219540 19.4471264 0.0222533
## Medium-High  0.500000  -8.613793  9.6137931 0.9906108
## Medium-Low  -9.833333 -18.947126 -0.7195402 0.0313247

Creative interpretation of the TukeyHSD test would indicate that the lower the calcium level, the higher the gill beat rate.