Problem 1

n <- 436
x1 <- 244
x2 <- 192
α <- 0.05
# Observed counts
observed <- c(x1, x2)

# Null values
theoritical_prop <- rep(1/2,2)

# Expected values
expected_values <- theoritical_prop*n
expected_values
## [1] 218 218
Q1_results <- chisq.test(observed)

Q1_results
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 6.2018, df = 1, p-value = 0.01276
decision <- ifelse(
          Q1_results$p.value < α, 
          "p-value < 0.05 → reject H0", 
          "p-value ≥ 0.05 → fail to reject H0")

decision
## [1] "p-value < 0.05 → reject H0"

Problem 2

# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2) 
# Set working directory
setwd("~/Downloads/25_Semesters/Fall/DATA101")

# Load the dataset
NutritionStudy_Data <- read_csv("NutritionStudy.csv")
## Rows: 315 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Smoke, Sex, VitaminUse
## dbl (14): ID, Age, Quetelet, Vitamin, Calories, Fat, Fiber, Alcohol, Cholest...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a working copy
df <- NutritionStudy_Data

names(df)
##  [1] "ID"            "Age"           "Smoke"         "Quetelet"     
##  [5] "Vitamin"       "Calories"      "Fat"           "Fiber"        
##  [9] "Alcohol"       "Cholesterol"   "BetaDiet"      "RetinolDiet"  
## [13] "BetaPlasma"    "RetinolPlasma" "Sex"           "VitaminUse"   
## [17] "PriorSmoke"
observed_dataset<- table(df$VitaminUse , df$Sex)
observed_dataset
##             
##              Female Male
##   No             87   24
##   Occasional     77    5
##   Regular       109   13
chisq.test(observed_dataset)
## 
##  Pearson's Chi-squared test
## 
## data:  observed_dataset
## X-squared = 11.071, df = 2, p-value = 0.003944

Problem 3

Goal: * Test whether mean(\(\mu\)) gill beat rate differs across three calcium levels: - Low - 0.71 mg/L - Medium - 5.24 mg/L - High - 18.24 mg/L

# Load the dataset
FishGills3_Data <- read_csv("FishGills3.csv")
## Rows: 90 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Calcium
## dbl (1): GillRate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a working copy
df <- FishGills3_Data

names(df)
## [1] "Calcium"  "GillRate"
anova_result <- aov(`GillRate` ~ Calcium, data = df)
summary(anova_result)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Calcium      2   2037  1018.6   4.648 0.0121 *
## Residuals   87  19064   219.1                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1