# Step 1: Install the Required Packages
# Packages only need to be installed once. Do not run the install code more than once after successful installation.
#install.packages("readxl")
#install.packages("ggplot2")
# Step 2: Open the Installed Packages
# Packages must be loaded every time you open a new R session.
library(readxl)
library(ggplot2)
# Step 3: Import and Name Dataset
# This code imports an Excel dataset and stores it as an object in R.
StudentData <- read_excel("/Users/ha113ab/Desktop/datasets/DatasetB2.xlsx")
# Step 4: Create a Contingency Table
# A contingency table is always used to visually see the relationship between two categorical variables.
table(StudentData$StudentType, StudentData$PetOwnership)
##
## No Yes
## Domestic 27 25
## International 23 25
# Step 5: Create a Bar Chart
# Bar charts are used to visually display the data. A bar chart is only needed if the variable contains three or more levels.
ggplot(StudentData, aes(x = StudentType, fill = PetOwnership)) +
geom_bar(position = "dodge") +
labs(
x = "Student Type",
y = "Frequency",
title = "Pet Ownership by Student Type"
) +
theme(
text = element_text(size = 14),
axis.title = element_text(size = 14),
axis.text = element_text(size = 14),
plot.title = element_text(size = 14),
legend.position = "none"
)

# Step 6: Conduct the Chi-Square Test of Independence
# Provide the contingency table to determine if there is an association between student type and pet ownership.
contingency_table <- table(StudentData$StudentType, StudentData$PetOwnership)
chisq.test(contingency_table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 0.040064, df = 1, p-value = 0.8414
# Step 7: Calculate Cramer's V (Effect Size)
# Calculate the effect size only if the p-value was statistically significant.
# Note: Check the p-value from Step 6. If p < 0.05, calculate effect size. If p > 0.05, effect size is NOT calculated.
# Step 8: Interpret and Report the Results
# A chi-square test of independence indicated that the observed
# frequencies were not different from the expected frequencies,
# χ²(1) = xx.xx, p = .xxx. Since the result was not statistically significant, effect size is not reported.
# OR if significant:
# A chi-square test of independence indicated that the observed
# frequencies were different from the expected frequencies,
# χ²(1) = xx.xx, p = .xxx. The association between the two variables was weak/moderate/strong (Cramer's V = .xx)