We load the data files from the participants actions folder.
The data is in the format of a csv file with the following columns: -
Timestamp: The time the action was taken -
Session: The session (participant number) -
AI Initiates: Whether the AI initiated the action -
User Initiates: Whether the user initiated the action -
Success: Whether the interaction was successful -
Failure: Whether the interaction failed -
Leaves: Whether the user left the session before
satisfactory interaction - Asks for Help: Whether the user
asked facilitators for help
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
file_list <- list.files(path = "../Participants_Actions/", pattern = "\\.csv$", full.names = TRUE)
data_list <- lapply(file_list, function(file) {
read.csv(file, header = TRUE, check.names = FALSE, stringsAsFactors = FALSE)
})
all_data <- do.call(rbind, data_list)
# Clean up the data
colnames(all_data) <- gsub(" ", "_", colnames(all_data))
## Ensure logicals are treated properly
all_data$AI_Initiates <- as.logical(all_data$AI_Initiates)
all_data$User_Initiates <- as.logical(all_data$User_Initiates)
all_data$Success <- as.logical(all_data$Success)
all_data$Failure <- as.logical(all_data$Failure)
all_data$Leaves <- as.logical(all_data$Leaves)
all_data$Asks_for_Help <- as.logical(all_data$Asks_for_Help)
## Convert Timestamp to POSIXct
all_data$Timestamp <- as.POSIXct(all_data$Timestamp)
## Show the structure of the data
str(all_data)
## 'data.frame': 185 obs. of 8 variables:
## $ Timestamp : POSIXct, format: "2025-05-02 13:42:11" "2025-05-02 13:45:55" ...
## $ Session : int 2 6 6 7 10 11 2 3 4 5 ...
## $ AI_Initiates : logi TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ User_Initiates: logi FALSE FALSE FALSE TRUE FALSE TRUE ...
## $ Success : logi FALSE TRUE FALSE FALSE TRUE FALSE ...
## $ Failure : logi TRUE FALSE FALSE TRUE FALSE TRUE ...
## $ Leaves : logi FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ Asks_for_Help : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
colnames(all_data)
## [1] "Timestamp" "Session" "AI_Initiates" "User_Initiates"
## [5] "Success" "Failure" "Leaves" "Asks_for_Help"
The experiment was conducted over multiple sessions with slight changes in experimental setup and criteria for participation in the experiment as this was refined over time as can be read in the report.
sessions <- length(data_list)
session_sizes <- sapply(data_list, nrow)
session_hours <- sapply(data_list, function(df) {
as.numeric(difftime(df$Timestamp[nrow(df)], df$Timestamp[1], units = "hours"))
})
session_hours <- round(session_hours, 2)
# Create a data frame with one row per session
session_data <- data.frame(
Session = 1:sessions,
Session_size = session_sizes,
Session_hours = session_hours
)
session_data
## Session Session_size Session_hours
## 1 1 6 0.56
## 2 2 54 3.83
## 3 3 5 1.43
## 4 4 21 4.36
## 5 5 8 1.54
## 6 6 91 5.12
dateovertime <- function(hours,participants) {
return(
participants/hours
)
}
#Make a new dataframe that combines session 1 and 2. Then it combines session 4 and 5. Then it also combines session size and session hours
session_data_combined <- data.frame(
Setup = c(1, 2, 3, 4),
size = c(
sum(session_sizes[1:2]), sum(session_sizes[3]),
sum(session_sizes[4:5]), sum(session_sizes[6])
),
hours = c(
sum(session_hours[1:2]),sum(session_hours[3]),
sum(session_hours[4:5]),sum(session_hours[6])
),
people_pr_hour =c(
sum(session_sizes[1:2])/sum(session_hours[1:2]),
sum(session_sizes[3])/sum(session_hours[3]),
sum(session_sizes[4:5])/sum(session_hours[4:5]),
sum(session_sizes[6])/sum(session_hours[6])
)
)
session_data_combined
## Setup size hours people_pr_hour
## 1 1 60 4.39 13.667426
## 2 2 5 1.43 3.496503
## 3 3 29 5.90 4.915254
## 4 4 91 5.12 17.773438
# Create a setup assignment for each row in all_data
setup_ids <- rep(session_data_combined$Setup, session_data_combined$size)
# Add setup ID to all_data
all_data$Setup <- setup_ids
library(dplyr)
outcome_summary <- all_data %>%
group_by(Setup) %>%
summarise(
Successes = sum(Success, na.rm = TRUE),
Failures = sum(Failure, na.rm = TRUE)
)
outcome_summary
## # A tibble: 4 × 3
## Setup Successes Failures
## <dbl> <int> <int>
## 1 1 6 53
## 2 2 4 0
## 3 3 10 18
## 4 4 10 76
final_data <- session_data_combined %>%
left_join(outcome_summary, by = "Setup") %>%
replace_na(list(Successes = 0, Failures = 0))
final_data
## Setup size hours people_pr_hour Successes Failures
## 1 1 60 4.39 13.667426 6 53
## 2 2 5 1.43 3.496503 4 0
## 3 3 29 5.90 4.915254 10 18
## 4 4 91 5.12 17.773438 10 76
final_data <- final_data %>%
mutate(
successes_per_hour = Successes / hours,
failures_per_hour = Failures / hours
)
final_data
## Setup size hours people_pr_hour Successes Failures successes_per_hour
## 1 1 60 4.39 13.667426 6 53 1.366743
## 2 2 5 1.43 3.496503 4 0 2.797203
## 3 3 29 5.90 4.915254 10 18 1.694915
## 4 4 91 5.12 17.773438 10 76 1.953125
## failures_per_hour
## 1 12.072893
## 2 0.000000
## 3 3.050847
## 4 14.843750
library(ggplot2)
library(tidyr)
# Pivot longer to plot both success and failure in one go
plot_data <- final_data %>%
select(Setup, successes_per_hour, failures_per_hour) %>%
pivot_longer(
cols = c(successes_per_hour, failures_per_hour),
names_to = "Outcome",
values_to = "Per_Hour"
)
ggplot(plot_data, aes(x = factor(Setup), y = Per_Hour, fill = Outcome)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Setup",
y = "Per Hour",
title = "Successes and Failures per Hour by Setup"
) +
scale_fill_manual(values = c("successes_per_hour" = "steelblue", "failures_per_hour" = "tomato")) +
theme_minimal()
outcome_split <- all_data %>%
group_by(Setup) %>%
summarise(
Success_AI = sum(Success & AI_Initiates, na.rm = TRUE),
Success_User = sum(Success & User_Initiates, na.rm = TRUE),
Failure_AI = sum(Failure & AI_Initiates, na.rm = TRUE),
Failure_User = sum(Failure & User_Initiates, na.rm = TRUE),
.groups = "drop"
)
final_data <- final_data %>%
left_join(outcome_split, by = "Setup") %>%
mutate(
success_ai_per_hour = Success_AI / hours,
success_user_per_hour = Success_User / hours,
failure_ai_per_hour = Failure_AI / hours,
failure_user_per_hour = Failure_User / hours
)
plot_data <- final_data %>%
select(Setup, success_ai_per_hour, success_user_per_hour, failure_ai_per_hour, failure_user_per_hour) %>%
pivot_longer(
cols = -Setup,
names_to = "Outcome",
values_to = "Per_Hour"
) %>%
mutate(
Outcome = recode(Outcome,
"success_ai_per_hour" = "Success (AI Initiated)",
"success_user_per_hour" = "Success (User Initiated)",
"failure_ai_per_hour" = "Failure (AI Initiated)",
"failure_user_per_hour" = "Failure (User Initiated)"
)
)
# Plot
ggplot(plot_data, aes(x = factor(Setup), y = Per_Hour, fill = Outcome)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Setup",
y = "Participants Per Hour",
title = "AI/User Initiated Successes and Failures per Hour by Setup"
) +
scale_fill_manual(values = c(
"Success (AI Initiated)" = "cyan",
"Success (User Initiated)" = "steelblue",
"Failure (AI Initiated)" = "brown",
"Failure (User Initiated)" = "tomato"
)) +
theme_minimal()
ggsave("plots/Successes_and_Failures_per_Hour_by_Setup.png", width = 8, height = 4, dpi = 300, bg = "white")
The raw data for each session can be seen below
# Calculate the start index of each session
session_starts <- c(1, cumsum(session_sizes) + 1)
session_starts <- session_starts[-length(session_starts)] # Remove the last over-count
# Optional: create session labels
session_labels <- paste("Session - ", 1:length(session_sizes)," - ", session_hours, "Hours")
# Plot as before
plot_data <- all_data %>%
arrange(Timestamp) %>%
mutate(
EventIndex = row_number(),
Outcome = case_when(
Success == 1 & User_Initiates == 1 ~ "Success User",
Success == 1 & AI_Initiates == 1 ~ "Success AI",
Failure == 1 & User_Initiates == 1 ~ "Failure User",
Failure == 1 & AI_Initiates == 1 ~ "Failure AI",
TRUE ~ "Unknown"
)
)
p <- ggplot(plot_data, aes(x = EventIndex, y = 1, color = Outcome)) +
geom_point(size = 3) +
geom_vline(xintercept = session_starts, linetype = "dashed", color = "black", alpha = 0.5) +
geom_text(data = data.frame(x = session_starts, y = 1.08, label = session_labels),
aes(x = x, y = y, label = label),
inherit.aes = FALSE, angle = 90, vjust = 1.2, size = 3) + # changed vjust for better centering
scale_color_manual(values = c("Success User" = "steelblue", "Success AI" = "cyan",
"Failure User" = "tomato", "Failure AI" = "brown",
"Unknown" = "grey")) +
labs(
title = "Session Outcomes in Sequence",
x = "Session Index",
y = "",
color = "Outcome"
) +
scale_x_continuous(breaks = seq(0, max(plot_data$EventIndex), by = 10)) +
expand_limits(y = c(0.95, 1.15)) + # Expand y limits slightly
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank())
ggsave("plots/Session_Outcomes_with_Session_Markers.png", plot = p, width = 8, height = 3, dpi = 300, bg = "white")
p
#### Bar graph
The data is a litle misleading as the facilitaros have introduced an error into the data. Whenever users was presented the “User initiated” condition they were often not sure on how to interact with the agent. Therefore they came to the facilitators and the facilitators said “Just say hi”. This was not ideal but was done to gain more qualitative data into the experience of initiation or not initiating a conversation. However these people could have been either successes or failures and the exact outcome could no longer be recorded as now all the participants just became successes.
# Create combined outcome and label AI Initiates as descriptive factor
plot_data <- all_data %>%
pivot_longer(cols = c(Success, Failure), names_to = "Outcome", values_to = "Count") %>%
group_by(AI_Initiates, Outcome, Asks_for_Help) %>%
summarise(Count = sum(Count), .groups = "drop") %>%
mutate(
Combined = case_when(
Outcome == "Success" & Asks_for_Help == 0 ~ "Success (No Help)",
Outcome == "Success" & Asks_for_Help == 1 ~ "Success (Asked Help)",
Outcome == "Failure" & Asks_for_Help == 0 ~ "Failure (No Help)",
Outcome == "Failure" & Asks_for_Help == 1 ~ "Failure (Asked Help)"
),
AI_Initiates_Label = factor(AI_Initiates, levels = c(0, 1), labels = c("User Initiated", "AI Initiated"))
)
# Plot stacked bars by Combined
p <- ggplot(plot_data, aes(x = AI_Initiates_Label, y = Count, fill = Combined)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~Outcome) +
scale_fill_manual(values = c(
"Success (No Help)" = "steelblue",
"Success (Asked Help)" = '#A3717C',
"Failure (No Help)" = "tomato",
"Failure (Asked Help)" = "gold" # Add if exists
)) +
labs(
title = "Success/Failure by Initiator and Help Request (Colored by Help + Outcome)",
x = "Who Initiated",
y = "Participants",
fill = "Outcome + Help"
) +
scale_y_continuous(breaks = seq(0, max(plot_data$Count), by = 5)) +
theme_minimal()
ggsave("plots/SuccessFailure_Stacked_by_Help_and_Outcome_Labelled.png", plot = p, width = 8, height = 4, dpi = 300, bg = "white")
p
Understanding whether the condition changed user behavior is done by finding the ratio of success to failure in both conditions as well as their dispersion.
analyze <- function(successes, failures) {
n <- successes + failures
p <- successes / n
sd <- sqrt(p * (1 - p) / n)
ci_low <- p - 1.96 * sd
ci_high <- p + 1.96 * sd
return(list(
success_rate = p,
sd = sd,
ci_95 = c(ci_low, ci_high)
))
}
# Summarize data into success and failures
successes <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE])
)
failures <- c(
sum(all_data$Failure[all_data$AI_Initiates == TRUE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE])
)
ai_result <- analyze(successes[1], failures[1])
user_result <- analyze(successes[2], failures[2])
# Create a data frame for the results
results_df <- data.frame(
Group = c("AI Initiated", "User Initiated"),
Successes = successes,
Failures = failures,
Success_Rate = c(ai_result$success_rate, user_result$success_rate),
SD = c(ai_result$sd, user_result$sd),
CI_Low = c(ai_result$ci_95[1], user_result$ci_95[1]),
CI_High = c(ai_result$ci_95[2], user_result$ci_95[2])
)
# Print the results
print(results_df)
## Group Successes Failures Success_Rate SD CI_Low
## 1 AI Initiated 16 73 0.1797753 0.04070394 0.09999557
## 2 User Initiated 14 74 0.1590909 0.03899024 0.08267005
## CI_High
## 1 0.2595550
## 2 0.2355118
# Fishers test
# Filter only the two groups
contingency_data <- results_df %>%
filter(Group %in% c("AI Initiated", "User Initiated")) %>%
select(Group, Successes, Failures)
# Create matrix for Fisher's test
contingency_table <- matrix(
c(
contingency_data$Successes[contingency_data$Group == "AI Initiated"],
contingency_data$Failures[contingency_data$Group == "AI Initiated"],
contingency_data$Successes[contingency_data$Group == "User Initiated"],
contingency_data$Failures[contingency_data$Group == "User Initiated"]
),
nrow = 2,
byrow = TRUE
)
rownames(contingency_table) <- c("Successes", "Failures")
colnames(contingency_table) <- c("AI Initiated", "User Initiated")
print(contingency_table)
## AI Initiated User Initiated
## Successes 16 73
## Failures 14 74
# Perform Fisher's exact test
fisher_test_result <- fisher.test(contingency_table, alternative = "two.sided")
print(fisher_test_result)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_table
## p-value = 0.8416
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4894725 2.7653626
## sample estimates:
## odds ratio
## 1.157549
# plot the results with confidence interval
p <- ggplot(results_df, aes(x = Group, y = Success_Rate)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.15) +
scale_y_continuous(limits = c(-0.5, 1)) +
labs(
subtitle = paste0("Fisher's Exact Test (AI vs AI no ask) p = ", signif(fisher_test_result$p.value, 3)),
title = "Success Rate by AI Initiation",
x = "Condition",
y = "Success Rate"
) +
theme_minimal()
p
ggsave("plots/Success_Rate_by_AI_Initiation.png", plot = p, width = 8, height = 3, dpi = 300, bg = "white")
analyze <- function(successes, failures) {
n <- successes + failures
p <- successes / n
sd <- sqrt(p * (1 - p) / n)
ci_low <- p - 1.96 * sd
ci_high <- p + 1.96 * sd
return(list(
success_rate = p,
sd = sd,
ci_95 = c(ci_low, ci_high)
))
}
# Summarize data into success and failures
successes <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE]))
failures <- c(
sum(all_data$Failure[all_data$AI_Initiates == TRUE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE &all_data$Asks_for_Help == FALSE]))
ai_result <- analyze(successes[1], failures[1])
noASK <- analyze(successes[2], failures[2])
# Create a data frame for the results
results_df <- data.frame(
Group = c("AI Initiated", "User Initiated without asking for help"),
Successes = successes,
Failures = failures,
Success_Rate = c(ai_result$success_rate, noASK$success_rate),
SD = c(ai_result$sd, noASK$sd),
CI_Low = c(ai_result$ci_95[1], noASK$ci_95[1]),
CI_High = c(ai_result$ci_95[2], noASK$ci_95[2])
)
# Print the results
print(results_df)
## Group Successes Failures Success_Rate
## 1 AI Initiated 16 73 0.17977528
## 2 User Initiated without asking for help 7 74 0.08641975
## SD CI_Low CI_High
## 1 0.04070394 0.09999557 0.2595550
## 2 0.03122032 0.02522793 0.1476116
# Fishers test
# Filter only the two groups
contingency_data <- results_df %>%
filter(Group %in% c("AI Initiated", "User Initiated without asking for help")) %>%
select(Group, Successes, Failures)
# Create matrix for Fisher's test
contingency_table <- matrix(
c(
contingency_data$Successes[contingency_data$Group == "AI Initiated"],
contingency_data$Failures[contingency_data$Group == "AI Initiated"],
contingency_data$Successes[contingency_data$Group == "User Initiated without asking for help"],
contingency_data$Failures[contingency_data$Group == "User Initiated without asking for help"]
),
nrow = 2,
byrow = TRUE
)
rownames(contingency_table) <- c("Successes", "Failures")
colnames(contingency_table) <- c("AI Initiated", "User Initiated w/o Help")
print(contingency_table)
## AI Initiated User Initiated w/o Help
## Successes 16 73
## Failures 7 74
# Perform Fisher's exact test
fisher_test_result <- fisher.test(contingency_table, alternative = "two.sided")
print(fisher_test_result)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_table
## p-value = 0.1147
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8374566 7.0356439
## sample estimates:
## odds ratio
## 2.305987
# plot the results with confidence interval
p <- ggplot(results_df, aes(x = Group, y = Success_Rate)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.15) +
scale_y_continuous(limits = c(-0.5, 1)) +
labs(
subtitle = paste0("Fisher's Exact Test (AI vs User no ask) p = ", signif(fisher_test_result$p.value, 3)),
title = "Success Rate by AI Initiation -w/o asking facilitators",
x = "Condition",
y = "Success Rate"
) +
theme_minimal()
p
ggsave("plots/Success_Rate_by_AI_Initiation_-_no_asked.png", plot = p, width = 8, height = 3, dpi = 300, bg = "white")
What if we assume that all the people that asked would have given up on Rosie and walked away defeated.
# Summarize data into success and failures
successes <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE])
)
failures <- c(
sum(all_data$Failure[all_data$AI_Initiates == TRUE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE])
)
# Assume everyone who asked for help would have failed
failures[3] <- failures[3] + sum(all_data$Success[all_data$Asks_for_Help == 1])
successes[3] <- successes[3] # Stays the same (no successes from those asking for help)
# Create analyzed results
ai_result <- analyze(successes[1], failures[1])
noASK_result <- analyze(successes[3], failures[3])
# Now adjust data to have just AI Initiated vs User Initiated (without help)
# Overwrite the general user-initiated group (2nd position) with the no-help group (3rd position)
successes[2] <- successes[3]
failures[2] <- failures[3]
# Remove AI initiated (first entry) to leave comparison between AI and User (no help)
successes <- successes[-2]
failures <- failures[-2]
# Check the revised numbers
print(successes)
## [1] 16 7
print(failures)
## [1] 73 82
# Create a data frame for the results
results_df <- data.frame(
Group = c("AI Initiated", "User Initiated without asked for help"),
Successes = successes,
Failures = failures,
Success_Rate = c(ai_result$success_rate, noASK$success_rate),
SD = c(ai_result$sd, noASK$sd),
CI_Low = c(ai_result$ci_95[1], noASK$ci_95[1]),
CI_High = c(ai_result$ci_95[2], noASK$ci_95[2])
)
results_df
## Group Successes Failures Success_Rate
## 1 AI Initiated 16 73 0.17977528
## 2 User Initiated without asked for help 7 82 0.08641975
## SD CI_Low CI_High
## 1 0.04070394 0.09999557 0.2595550
## 2 0.03122032 0.02522793 0.1476116
# Filter only the two groups
contingency_data <- results_df %>%
filter(Group %in% c("AI Initiated", "User Initiated without asked for help")) %>%
select(Group, Successes, Failures)
# Create matrix for Fisher's test
contingency_table <- matrix(
c(
contingency_data$Successes[contingency_data$Group == "AI Initiated"],
contingency_data$Failures[contingency_data$Group == "AI Initiated"],
contingency_data$Successes[contingency_data$Group == "User Initiated without asked for help"],
contingency_data$Failures[contingency_data$Group == "User Initiated without asked for help"]
),
nrow = 2,
byrow = TRUE
)
rownames(contingency_table) <- c("Successes", "Failures")
colnames(contingency_table) <- c("AI Initiated", "User Initiated w/o Help")
print(contingency_table)
## AI Initiated User Initiated w/o Help
## Successes 16 73
## Failures 7 82
# Perform Fisher's exact test
fisher_test_result <- fisher.test(contingency_table, alternative = "two.sided")
print(fisher_test_result)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_table
## p-value = 0.07209
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9307952 7.7712041
## sample estimates:
## odds ratio
## 2.554301
print(fisher_test_result$p.value)
## [1] 0.07208567
# Print the results
print(results_df)
## Group Successes Failures Success_Rate
## 1 AI Initiated 16 73 0.17977528
## 2 User Initiated without asked for help 7 82 0.08641975
## SD CI_Low CI_High
## 1 0.04070394 0.09999557 0.2595550
## 2 0.03122032 0.02522793 0.1476116
# plot the results with confidence interval
p <- ggplot(results_df, aes(x = Group, y = Success_Rate)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.15) +
scale_y_continuous(limits = c(-0.5, 1)) +
labs(
subtitle = paste0("Fisher's Exact Test AI vs user no ask where asking is failure p = ", signif(fisher_test_result$p.value, 3)),
title = "Success Rate by AI Initiation where asking is failing",
x = "Condition",
y = "Success Rate"
) +
theme_minimal()
p
ggsave("plots/Success_Rate_by_AI_Initiation_-_asking_is_failing.png", plot = p, width = 8, height = 3, dpi = 300, bg = "white")
# Summarize data into success and failures
Asked <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE & all_data$Asks_for_Help == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == TRUE])
)
DidNotAsk <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE & all_data$Asks_for_Help == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE])
)
# Create analyzed results
ai_result <- analyze(Asked[1], DidNotAsk[1])
user_result <- analyze(Asked[2], DidNotAsk[2])
# Create a data frame for the results
results_df <- data.frame(
Group = c("AI Initiated", "User Initiated"),
Asked = Asked,
DidNotAsk = DidNotAsk,
Success_Rate = c(ai_result$success_rate, user_result$success_rate),
SD = c(ai_result$sd, user_result$sd),
CI_Low = c(ai_result$ci_95[1], user_result$ci_95[1]),
CI_High = c(ai_result$ci_95[2], user_result$ci_95[2])
)
# Print the results
print(results_df)
## Group Asked DidNotAsk Success_Rate SD CI_Low CI_High
## 1 AI Initiated 1 15 0.0625 0.06051536 -0.05611011 0.1811101
## 2 User Initiated 7 7 0.5000 0.13363062 0.23808398 0.7619160
# Filter only the two groups
contingency_data <- results_df %>%
filter(Group %in% c("AI Initiated", "User Initiated")) %>%
select(Group, Asked, DidNotAsk)
contingency_data
## Group Asked DidNotAsk
## 1 AI Initiated 1 15
## 2 User Initiated 7 7
# Remove the firs column from contingency data
contingency_data <- contingency_data[, -1]
rownames(contingency_data) <- c("AI Initiated", "User Initiated")
colnames(contingency_data) <- c("Asked", "DidNotAsk")
contingency_data
## Asked DidNotAsk
## AI Initiated 1 15
## User Initiated 7 7
# Perform Fisher's exact test
fisher_test_result <- fisher.test(contingency_data, alternative = "two.sided")
print(fisher_test_result)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_data
## p-value = 0.01209
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.001395919 0.741585602
## sample estimates:
## odds ratio
## 0.07342368
print(fisher_test_result$p.value)
## [1] 0.01209395
# plot the results with confidence interval
p <- ggplot(results_df, aes(x = Group, y = Success_Rate)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.15) +
scale_y_continuous(limits = c(-0.5, 1)) +
labs(
subtitle = paste0("Fisher's Exact Test Asking vs not asking vs AI vs user p = ", signif(fisher_test_result$p.value, 3)),
title = "Askin Rate by AI Initiation",
x = "Condition",
y = "Asked Rate"
) +
theme_minimal()
p
ggsave("plots/Askin_Rate_by_AI_Initiation.png", plot = p, width = 8, height = 3, dpi = 300, bg = "white")
analyze <- function(successes, failures) {
n <- successes + failures
p <- successes / n
sd <- sqrt(p * (1 - p) / n)
ci_low <- p - 1.96 * sd
ci_high <- p + 1.96 * sd
return(list(
success_rate = p,
sd = sd,
ci_95 = c(ci_low, ci_high)
))
}
library(Hmisc)
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
analyse_wilson_Confidence <- function(successes, failures) {
n <- successes + failures
p <- successes / n
sd <- sqrt(p * (1 - p) / n)
ci <- binconf(x = successes, n = successes+failures, method = "wilson")
return(list(
success_rate = p,
sd = sd,
ci_lower = ci[2],
ci_upper = ci[3]
))
}
successes <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE]) - sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == TRUE])
)
failures <- c(
sum(all_data$Failure[all_data$AI_Initiates == TRUE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE]) +
sum(all_data$Success[all_data$Asks_for_Help == 1])
)
# Create analyzed results
ai_result <- analyse_wilson_Confidence(successes[1], failures[1])
user_result <- analyse_wilson_Confidence(successes[2], failures[2])
noASK_result <- analyse_wilson_Confidence(successes[3], failures[3])
ask_failing_result <- analyse_wilson_Confidence(successes[4], failures[4])
results_df <- data.frame(
Group = c("AI Initiated", "User Initiated", "w/o Asked for Help", "Asking is failing"),
Success_Rate = c(ai_result$success_rate, user_result$success_rate,
noASK_result$success_rate, ask_failing_result$success_rate),
CI_Low = c(ai_result$ci_lower, user_result$ci_lower,
noASK_result$ci_lower, ask_failing_result$ci_lower),
CI_High = c(ai_result$ci_upper, user_result$ci_upper,
noASK_result$ci_upper, ask_failing_result$ci_upper)
)
# Run Fisher's exact tests
pvals <- c(
fisher.test(matrix(c(successes[1], failures[1], successes[2], failures[2]), nrow = 2))$p.value,
fisher.test(matrix(c(successes[1], failures[1], successes[3], failures[3]), nrow = 2))$p.value,
fisher.test(matrix(c(successes[1], failures[1], successes[4], failures[4]), nrow = 2))$p.value
)
fisher.test(matrix(c(successes[1], failures[1], successes[2], failures[2]), nrow = 2))
##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(successes[1], failures[1], successes[2], failures[2]), nrow = 2)
## p-value = 0.8416
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4894725 2.7653626
## sample estimates:
## odds ratio
## 1.157549
# Format p-values for plotting
p_labels <- c("",
paste0("p = ", signif(pvals[1], 3)),
paste0("p = ", signif(pvals[2], 3)),
paste0("p = ", signif(pvals[3], 3)))
results_df$p_label <- p_labels
results_df
## Group Success_Rate CI_Low CI_High p_label
## 1 AI Initiated 0.17977528 0.11379893 0.2722512
## 2 User Initiated 0.15909091 0.09719902 0.2495012 p = 0.842
## 3 w/o Asked for Help 0.08641975 0.04249258 0.1677992 p = 0.115
## 4 Asking is failing 0.07865169 0.03861982 0.1535514 p = 0.0721
# Reorder rows manually: 1, 2, 4, 3
results_df <- results_df[c(1, 2, 3, 4), ]
results_df$Group <- factor(results_df$Group, levels = results_df$Group)
p <- ggplot(results_df, aes(x = Group, y = Success_Rate)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.15) +
geom_text(aes(label = p_label, y = Success_Rate + 0.05), size = 3, vjust = -4) +
geom_hline(yintercept = 0.5, linetype = "solid", color = "red", size = 1.2)+
scale_y_continuous(limits = c(0, 1)) +
labs(
title = "Success Rate by AI Initiation",
subtitle = "Fisher's Exact Test p-values using Wilson score interval",
x = "Condition",
y = "Success Rate"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p
ggsave("plots/Success_Rate_by_AI_Initiation_combined.png", plot = p, width = 8, height = 7, dpi = 300, bg = "white")
get_fisher_ci <- function(success_a, failure_a, success_b, failure_b) {
table <- matrix(c(success_a, failure_a, success_b, failure_b), nrow = 2)
test <- fisher.test(table)
return(data.frame(
Comparison = paste0("AI vs ", deparse(substitute(success_b))),
odds_ratio = test$estimate,
CI_Low = test$conf.int[1],
CI_High = test$conf.int[2],
p_value = test$p.value
))
}
# Use your current values
successes <- c(
sum(all_data$Success[all_data$AI_Initiates == TRUE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE]),
sum(all_data$Success[all_data$AI_Initiates == FALSE]) - sum(all_data$Success[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == TRUE])
)
failures <- c(
sum(all_data$Failure[all_data$AI_Initiates == TRUE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE & all_data$Asks_for_Help == FALSE]),
sum(all_data$Failure[all_data$AI_Initiates == FALSE]) +
sum(all_data$Success[all_data$Asks_for_Help == 1])
)
# Run Fisher's test for comparisons with AI Initiated
labels <- c("User Initiated", "User Initiated (No Help)", "User Initiated (Help Fails)")
fisher_results <- do.call(rbind, lapply(2:4, function(i) {
test <- fisher.test(matrix(c(successes[1], failures[1], successes[i], failures[i]), nrow = 2))
data.frame(
Group = labels[i - 1],
odds_ratio = test$estimate,
CI_Low = test$conf.int[1],
CI_High = test$conf.int[2],
p_value = test$p.value
)
}))
ggplot(fisher_results, aes(x = Group, y = odds_ratio)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = CI_Low, ymax = CI_High), width = 0.2) +
geom_text(aes(label = paste0("p = ", signif(p_value, 3))), vjust = -1.5) +
scale_y_log10() + # Odds ratios should be plotted on log scale
labs(
title = "Fisher's Exact Test: Odds Ratios vs AI Initiated",
x = "Compared Group",
y = "Odds Ratio (log scale)"
) +
theme_minimal()