October 25, 2024
Important Note: In all of the questions below, use the p − value < 0.05 convention to determine statistical significance. If you are looking to run a hypothesis test for a percentage or proportion, you can still use the t.test function in R with binary data (0/1 values) in the same way that you saw in the online videos.
#Load dataset:
data <- read.csv("ElectionData.csv")
# Create a table summarizing the data by variant
variant_table <- table(data$Variant, data$Share)
rownames(variant_table) <- c("Variant 1", "Variant 2", "Variant 3")
colnames(variant_table) <- c("No Share", "Shared")
print(variant_table)
##
## No Share Shared
## Variant 1 5021 341
## Variant 2 4984 338
## Variant 3 4963 353
cat("Percentage of supporters who pledged their support for variant 1: ", 341/(5021+341)*100, "%")
## Percentage of supporters who pledged their support for variant 1: 6.359567 %
cat("Percentage of supporters who pledged their support for variant 2: ", 338/(4984+338)*100, "%")
## Percentage of supporters who pledged their support for variant 2: 6.350996 %
cat("Percentage of supporters who pledged their support for variant 3: ", 353/(4963+353)*100, "%")
## Percentage of supporters who pledged their support for variant 3: 6.640331 %
…..
variant_1_share <- data$Share[data$Variant == 1]
variant_2_share <- data$Share[data$Variant == 2]
variant_3_share <- data$Share[data$Variant == 3]
# Variant 1 vs Variant 2
t_test_1_2 <- t.test(variant_1_share, variant_2_share)
p_value_1_2 <- t_test_1_2$p.value
# Variant 1 vs Variant 3
t_test_1_3 <- t.test(variant_1_share, variant_3_share)
p_value_1_3 <- t_test_1_3$p.value
# Variant 2 vs Variant 3
t_test_2_3 <- t.test(variant_2_share, variant_3_share)
p_value_2_3 <- t_test_2_3$p.value
# Display the p-values
cat("P-value for Variant 1 vs Variant 2:", p_value_1_2, "\n")
## P-value for Variant 1 vs Variant 2: 0.9855141
cat("P-value for Variant 1 vs Variant 3:", p_value_1_3, "\n")
## P-value for Variant 1 vs Variant 3: 0.5563072
cat("P-value for Variant 2 vs Variant 3:", p_value_2_3, "\n")
## P-value for Variant 2 vs Variant 3: 0.5449305
Based on this information we should select variant 3. Variant 3 has the highest percentage of supporters who shared on social media (6.64%), even though the differences between all three variants were not statistically significant.
# Subsets for each party
democrats <- subset(data, Party == "Democrat")
independents <- subset(data, Party == "Independent")
others <- subset(data, Party == "Other")
# Percentage of supporters who shared for each variant within each party
# Democrats
percentage_dem_variant1 <- mean(subset(democrats, Variant == 1)$Share) * 100
percentage_dem_variant2 <- mean(subset(democrats, Variant == 2)$Share) * 100
percentage_dem_variant3 <- mean(subset(democrats, Variant == 3)$Share) * 100
cat("Percentage of Democrat supporters who pledged support (Variant 1):", percentage_dem_variant1, "%\n")
## Percentage of Democrat supporters who pledged support (Variant 1): 6.551363 %
cat("Percentage of Democrat supporters who pledged support (Variant 2):", percentage_dem_variant2, "%\n")
## Percentage of Democrat supporters who pledged support (Variant 2): 6.226516 %
cat("Percentage of Democrat supporters who pledged support (Variant 3):", percentage_dem_variant3, "%\n")
## Percentage of Democrat supporters who pledged support (Variant 3): 6.608411 %
# Independents
percentage_indep_variant1 <- mean(subset(independents, Variant == 1)$Share) * 100
percentage_indep_variant2 <- mean(subset(independents, Variant == 2)$Share) * 100
percentage_indep_variant3 <- mean(subset(independents, Variant == 3)$Share) * 100
cat("Percentage of Independent supporters who pledged support (Variant 1):", percentage_indep_variant1, "%\n")
## Percentage of Independent supporters who pledged support (Variant 1): 5.988593 %
cat("Percentage of Independent supporters who pledged support (Variant 2):", percentage_indep_variant2, "%\n")
## Percentage of Independent supporters who pledged support (Variant 2): 9.737828 %
cat("Percentage of Independent supporters who pledged support (Variant 3):", percentage_indep_variant3, "%\n")
## Percentage of Independent supporters who pledged support (Variant 3): 3.925067 %
# Others
percentage_other_variant1 <- mean(subset(others, Variant == 1)$Share) * 100
percentage_other_variant2 <- mean(subset(others, Variant == 2)$Share) * 100
percentage_other_variant3 <- mean(subset(others, Variant == 3)$Share) * 100
cat("Percentage of Other supporters who pledged support (Variant 1):", percentage_other_variant1, "%\n")
## Percentage of Other supporters who pledged support (Variant 1): 5.668016 %
cat("Percentage of Other supporters who pledged support (Variant 2):", percentage_other_variant2, "%\n")
## Percentage of Other supporters who pledged support (Variant 2): 0.3787879 %
cat("Percentage of Other supporters who pledged support (Variant 3):", percentage_other_variant3, "%\n")
## Percentage of Other supporters who pledged support (Variant 3): 12.57036 %
# Perform t-tests to compare variants within each party
# Democrats
dem_variant1_share <- subset(democrats, Variant == 1)$Share
dem_variant2_share <- subset(democrats, Variant == 2)$Share
dem_variant3_share <- subset(democrats, Variant == 3)$Share
t_test_dem_1_2 <- t.test(dem_variant1_share, dem_variant2_share)
t_test_dem_1_3 <- t.test(dem_variant1_share, dem_variant3_share)
t_test_dem_2_3 <- t.test(dem_variant2_share, dem_variant3_share)
# Independents
indep_variant1_share <- subset(independents, Variant == 1)$Share
indep_variant2_share <- subset(independents, Variant == 2)$Share
indep_variant3_share <- subset(independents, Variant == 3)$Share
t_test_indep_1_2 <- t.test(indep_variant1_share, indep_variant2_share)
t_test_indep_1_3 <- t.test(indep_variant1_share, indep_variant3_share)
t_test_indep_2_3 <- t.test(indep_variant2_share, indep_variant3_share)
# Others
other_variant1_share <- subset(others, Variant == 1)$Share
other_variant2_share <- subset(others, Variant == 2)$Share
other_variant3_share <- subset(others, Variant == 3)$Share
t_test_other_1_2 <- t.test(other_variant1_share, other_variant2_share)
t_test_other_1_3 <- t.test(other_variant1_share, other_variant3_share)
t_test_other_2_3 <- t.test(other_variant2_share, other_variant3_share)
# Display p-values for each party's variants comparison
cat("Democrats - P-value (Variant 1 vs 2):", t_test_dem_1_2$p.value, "\n")
## Democrats - P-value (Variant 1 vs 2): 0.5641183
cat("Democrats - P-value (Variant 1 vs 3):", t_test_dem_1_3$p.value, "\n")
## Democrats - P-value (Variant 1 vs 3): 0.9207834
cat("Democrats - P-value (Variant 2 vs 3):", t_test_dem_2_3$p.value, "\n")
## Democrats - P-value (Variant 2 vs 3): 0.5031652
cat("Independents - P-value (Variant 1 vs 2):", t_test_indep_1_2$p.value, "\n")
## Independents - P-value (Variant 1 vs 2): 0.001322141
cat("Independents - P-value (Variant 1 vs 3):", t_test_indep_1_3$p.value, "\n")
## Independents - P-value (Variant 1 vs 3): 0.02726343
cat("Independents - P-value (Variant 2 vs 3):", t_test_indep_2_3$p.value, "\n")
## Independents - P-value (Variant 2 vs 3): 7.705752e-08
cat("Others - P-value (Variant 1 vs 2):", t_test_other_1_2$p.value, "\n")
## Others - P-value (Variant 1 vs 2): 1.145532e-06
cat("Others - P-value (Variant 1 vs 3):", t_test_other_1_3$p.value, "\n")
## Others - P-value (Variant 1 vs 3): 0.0001077236
cat("Others - P-value (Variant 2 vs 3):", t_test_other_2_3$p.value, "\n")
## Others - P-value (Variant 2 vs 3): 5.670008e-16
….. For Democrats we will select variant 3 since the percentage of supporters who pledge is the highest (6.608411%), even though differences between variants are not statistically significant.
For Independent we will select variant 2 (Percentage of Independent supporters who pledged support Variant 2: 9.737828%). There is a statistically significant difference and variant 2 is clearly the best choice.
For Other we will select variant 3 (Percentage of Other supporters who pledged support Variant 3: 12.57036%). Variant 3 performed significantly better than the other variants. There is a statistically significant difference and variant 3 is clearly the best choice.