# Step 1: Install the Required Packages
# Packages only need to be installed once. Do not run the install code more than once after successful installation.

#install.packages("readxl")
#install.packages("ggplot2")

# Step 2: Open the Installed Packages
# Packages must be loaded every time you open a new R session.

library(readxl)
library(ggplot2)

# Step 3: Import and Name Dataset
# This code imports an Excel dataset and stores it as an object in R.

StudentData <- read_excel("/Users/ha113ab/Desktop/datasets/DatasetB2.xlsx")

# Step 4: Create a Contingency Table
# A contingency table is always used to visually see the relationship between two categorical variables.

table(StudentData$StudentType, StudentData$PetOwnership)
##                
##                 No Yes
##   Domestic      27  25
##   International 23  25
# Step 5: Create a Bar Chart
# Bar charts are used to visually display the data. A bar chart is only needed if the variable contains three or more levels.

ggplot(StudentData, aes(x = StudentType, fill = PetOwnership)) +
  geom_bar(position = "dodge") +
  labs(
    x = "Student Type",
    y = "Frequency",
    title = "Pet Ownership by Student Type"
  ) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    plot.title = element_text(size = 14),
    legend.position = "none"
  )

# Step 6: Conduct the Chi-Square Test of Independence
# Provide the contingency table to determine if there is an association between student type and pet ownership.

contingency_table <- table(StudentData$StudentType, StudentData$PetOwnership)
chisq.test(contingency_table)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 0.040064, df = 1, p-value = 0.8414
# Step 7: Calculate Cramer's V (Effect Size)
# Calculate the effect size only if the p-value was statistically significant.

# Note: Check the p-value from Step 6. If p < 0.05, calculate effect size. If p > 0.05, effect size is NOT calculated.

# Step 8: Interpret and Report the Results

# A chi-square test of independence indicated that the observed 
# frequencies were not different from the expected frequencies, 
# χ²(1) = xx.xx, p = .xxx. Since the result was not statistically significant, effect size is not reported.

# OR if significant:
# A chi-square test of independence indicated that the observed 
# frequencies were different from the expected frequencies, 
# χ²(1) = xx.xx, p = .xxx. The association between the two variables was weak/moderate/strong (Cramer's V = .xx)