Illya Mowerman, Ph.D.
Chi-square tests are fundamental statistical methods for analyzing categorical data:
# Create sample marketing data
marketing_data <- data.frame(
Campaign = c(rep("Email", 200), rep("Social", 150), rep("Print", 150)),
Response = c(
rep("Converted", 80), rep("Not_Converted", 120), # Email
rep("Converted", 45), rep("Not_Converted", 105), # Social
rep("Converted", 30), rep("Not_Converted", 120) # Print
)
)
# Create and print contingency table
campaign_table <- table(marketing_data$Campaign, marketing_data$Response)
print(campaign_table)##
## Converted Not_Converted
## Email 80 120
## Print 30 120
## Social 45 105
##
## Pearson's Chi-squared test
##
## data: campaign_table
## X-squared = 16.129, df = 2, p-value = 0.0003145
# Visualize results
ggplot(marketing_data, aes(x = Campaign, fill = Response)) +
geom_bar(position = "fill") +
labs(title = "Conversion Rates by Marketing Campaign",
y = "Proportion",
x = "Campaign Type") +
theme_minimal()# Perform chi-square goodness of fit test
dice_test <- chisq.test(dice_rolls, p = expected_prob)
print(dice_test)##
## Chi-squared test for given probabilities
##
## data: dice_rolls
## X-squared = 7.6923, df = 5, p-value = 0.174
# Visualize results
dice_df <- data.frame(
Number = 1:6,
Observed = dice_rolls,
Expected = sum(dice_rolls)/6
)
ggplot(dice_df, aes(x = factor(Number))) +
geom_bar(aes(y = Observed), stat = "identity", fill = "skyblue") +
geom_hline(yintercept = sum(dice_rolls)/6, color = "red", linetype = "dashed") +
labs(title = "Dice Roll Frequencies",
x = "Dice Number",
y = "Frequency") +
theme_minimal()##
## Converted Not_Converted
## Email 62.0 138.0
## Print 46.5 103.5
## Social 46.5 103.5
# Create student dataset
student_data <- data.frame(
Gender = c(rep("Male", 100), rep("Female", 100)),
Study_Method = c(
rep("Online", 40), rep("Traditional", 60), # Male
rep("Online", 55), rep("Traditional", 45) # Female
)
)
# Tasks:
# 1. Create contingency table
student_table <- table(student_data$Gender, student_data$Study_Method)
print(student_table)##
## Online Traditional
## Female 55 45
## Male 40 60
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: student_table
## X-squared = 3.9298, df = 1, p-value = 0.04744
# 3. Visualize results
ggplot(student_data, aes(x = Gender, fill = Study_Method)) +
geom_bar(position = "fill") +
labs(title = "Study Method Preference by Gender",
y = "Proportion",
x = "Gender") +
theme_minimal()?chisq.testggplot2 for visualizationvcd for visualizing categorical dataeffectsize for calculating effect sizes