library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tibble)

Problem 2

H0: Both males and females are take vitamins. H1: Males are more likely to take vitamins compared to females.

vitamins <- read.csv("NutritionStudy.csv")

head(vitamins)
##   ID Age Smoke Quetelet Vitamin Calories  Fat Fiber Alcohol Cholesterol
## 1  1  64    No  21.4838       1   1298.8 57.0   6.3     0.0       170.3
## 2  2  76    No  23.8763       1   1032.5 50.1  15.8     0.0        75.8
## 3  3  38    No  20.0108       2   2372.3 83.6  19.1    14.1       257.9
## 4  4  40    No  25.1406       3   2449.5 97.5  26.5     0.5       332.6
## 5  5  72    No  20.9850       1   1952.1 82.6  16.2     0.0       170.8
## 6  6  40    No  27.5214       3   1366.9 56.0   9.6     1.3       154.6
##   BetaDiet RetinolDiet BetaPlasma RetinolPlasma    Sex VitaminUse PriorSmoke
## 1     1945         890        200           915 Female    Regular          2
## 2     2653         451        124           727 Female    Regular          1
## 3     6321         660        328           721 Female Occasional          2
## 4     1061         864        153           615 Female         No          2
## 5     2863        1209         92           799 Female    Regular          1
## 6     1729        1439        148           654 Female         No          2
tail(vitamins)
##      ID Age Smoke Quetelet Vitamin Calories   Fat Fiber Alcohol Cholesterol
## 310 310  48    No  24.6147       2   2021.1  72.2  16.6     9.0       299.1
## 311 311  46    No  25.8967       3   2263.6  98.2  19.4     2.6       306.5
## 312 312  45    No  23.8270       1   1841.1  84.2  14.1     2.2       257.7
## 313 313  49    No  24.2613       1   1125.6  44.8  11.9     4.0       150.5
## 314 314  31    No  23.4525       1   2729.6 144.4  13.2     2.2       381.8
## 315 315  45    No  26.5081       1   1627.0  77.4   9.9     0.2       195.6
##     BetaDiet RetinolDiet BetaPlasma RetinolPlasma    Sex VitaminUse PriorSmoke
## 310     1392        1027        144           752 Female Occasional          2
## 311     2572        1261        164           216 Female         No          2
## 312     1665         465         80           328 Female    Regular          1
## 313     6943         520        300           502 Female    Regular          1
## 314      741         644        121           684 Female    Regular          2
## 315     1242         554        233           826 Female    Regular          1
vitamins_intake <- table(vitamins$Sex, vitamins$VitaminUse)

chisq.test(vitamins_intake)
## 
##  Pearson's Chi-squared test
## 
## data:  vitamins_intake
## X-squared = 11.071, df = 2, p-value = 0.003944

The p-value is small (0.003844) which therefore means we can reject the null hypothesis showing that there is a difference between males and females in their vitamin intake.

Problem 3

Fish Gills ANOVA test:

H0: There is no difference in the fish gill rate in the different calcium levels in the water. H1: There is a difference in the fish gill rate in the different calcium levels in the water.

fish_gills <- read.csv("FishGills3.csv")

head(fish_gills)
##   Calcium GillRate
## 1     Low       55
## 2     Low       63
## 3     Low       78
## 4     Low       85
## 5     Low       65
## 6     Low       98
tail(fish_gills)
##    Calcium GillRate
## 85    High       52
## 86    High       37
## 87    High       57
## 88    High       62
## 89    High       40
## 90    High       42
anova_result <- aov(GillRate ~ Calcium, data = fish_gills)

anova_result
## Call:
##    aov(formula = GillRate ~ Calcium, data = fish_gills)
## 
## Terms:
##                   Calcium Residuals
## Sum of Squares   2037.222 19064.333
## Deg. of Freedom         2        87
## 
## Residual standard error: 14.80305
## Estimated effects may be unbalanced
summary(anova_result)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Calcium      2   2037  1018.6   4.648 0.0121 *
## Residuals   87  19064   219.1                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-value being small (0.0121) means that there is enough evidence to reject the null hypothesis meaning that there is a difference in the fish gill rates in different calcium water levels.