#Clean Entertainment data frame to explore the effect of measured other variables
study2_data_raw_ent_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Entertainment (Believability AND Sharing Intentions) - FINAL_July 28, 2023_06.18.csv")
## Rows: 63 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head (study2_data_raw_ent_all)
## # A tibble: 6 × 166
## StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "2023-07… "2023-… "IP A… "100" "785" "True" "2023-07-02…
## 4 "2023-07… "2023-… "IP A… "100" "1411" "True" "2023-07-02…
## 5 "2023-07… "2023-… "IP A… "100" "1686" "True" "2023-07-02…
## 6 "2023-07… "2023-… "IP A… "100" "741" "True" "2023-07-02…
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## # `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## # USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## # SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## # EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …
# Drop the first 4 raws as those were used for test
study2_data_all_ent<- study2_data_raw_ent_all[-c(1:2),]
head(study2_data_all_ent)
## # A tibble: 6 × 166
## StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2023-07-… 2023-0… IP Ad… 100 785 True 2023-07-02 …
## 2 2023-07-… 2023-0… IP Ad… 100 1411 True 2023-07-02 …
## 3 2023-07-… 2023-0… IP Ad… 100 1686 True 2023-07-02 …
## 4 2023-07-… 2023-0… IP Ad… 100 741 True 2023-07-02 …
## 5 2023-07-… 2023-0… IP Ad… 100 735 True 2023-07-02 …
## 6 2023-07-… 2023-0… IP Ad… 100 1578 True 2023-07-02 …
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## # `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## # USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## # SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## # EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …
nrow(study2_data_all_ent)
## [1] 61
#```{r}
#Likert scale has
likely_values <- c(
"very unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"very likely"
)
consume_values <- c(
"Less than 1 hour per day",
"1-2 hours per day",
"2-3 hours per day",
"3-4 hours per day",
"5+ hours per day"
)
interest_levels <- c ( "not at all interested in this",
"not interested",
"neither not interested nor interested",
"interested",
"very much interested"
)
likley_shory_values <- c("Very unlikely",
"Unlikely",
"Neither likely nor unlikely",
"Likely",
"Very likely")
knowledgable_values <- c(
"very unknowledgeable",
"somewhat unknowledgeable",
"neither",
"somewhat knowledgeable",
"very knowledgeable"
)
easy_levels <- c("Very difficult",
"Difficult",
"Neither difficult nor easy",
"Easy",
"Very easy")
boolen_q <- c( "Yes", "No")
importance_levels <- c ("very unimportant" ,
"unimportant",
"neither important nor unimportant",
"important" ,
"very important"
)
novel_levels <-c ("not at all novel",
"not novel",
"neither novel nor not novel",
"novel",
"very novel")
familiar_levels <-c ("not at all familiar",
"not familiar",
"neither familiar nor unfamiliar",
"familiar",
"very familiar")
believe_levels <- c("very unlikely",
"unlikely",
"neither likely nor unlikely",
"likely",
"very likely" )
####################
#After the survey, the postsurvey questions
judging_impact_values <- c(
"extremely unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"extremely likely"
)
sharing_accuracy_level <- c ("not at all important",
"moderately important" ,
"slightly important",
"neither important nor unimportant",
"very important" ,
"extremely important"
)
ent_individual_df<-study2_data_all_ent |> filter(Finished == "True") |>
mutate(Duration = as.numeric(`Duration (in seconds)`),
AGE = as.numeric(AGE),
BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
USE_SNS = ordered(USE_SNS, levels = consume_values),
use_sns = as.numeric(USE_SNS, levels = consume_values),
WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
select(
ResponseId,
Duration,
AGE,
BROWSE_INTERNET,
browse_internet,
USE_SNS,
use_sns,
SNS_PLATFORM_USE,
WATCHING_BEHAVIOR,
watching_behavior,
SHARING_BEHAVIOR,
sharing_behavior,
KNOW_DEEPFAKE,
KNOW_CREATE_DF,
know_create_df,
EXP_CREATE_DF,
EASE_CREATE_DF,
ease_create_df
) |> mutate(
SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
value = TRUE
) |>
unnest() |>
mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |>
pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.
ent_behavior_df <-study2_data_all_ent |> filter(Finished == "True") |>
select(ResponseId, matches ("_R_"), matches ("_DF_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
pivot_wider(names_from = question, values_from = value)|>
select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
#Removing the error value created with Dont share 3E
#filter (video != "3E") |>
mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
BELIEVE= str_to_lower(BELIEVE),
believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
#Select all the numeric values
select (ResponseId,
fake,
video,
SHARE,
BELIEVE,
believe_numerical,
share_numerical,
impt_numerical ,
interest_numerical,
familiar_numerical,
novel_numerical)
#Selecting the Control group and convert likert value to numerical in post survey
ent_behavior_df_condition_cnrl <-study2_data_all_ent |> filter(Finished == "True") |>
mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
filter (condition == "Control") |>
mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
SHARING_INT = str_to_lower(CTRL_SHARING_INT),
sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
cy_numerical = as.numeric(ordered(CTRL_SHARING_ACCY, levels = sharing_accuracy_level)))|>
select (ResponseId,
condition,
JUDGING_IMPACT,
SHARING_PERSP,
SHARING_INT,
SHARING_ACCY,
judging_impact_numerical,
sharing_persp_numerical,
sharing_int_numerical,
sharing_accy_numerical)
#Selecting the Treatment group and convert likert value to numerical in post survey
ent_behavior_df_condition_trmnt <-study2_data_all_ent |> filter(Finished == "True") |>
mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
filter (condition == "Treatment") |>
mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
SHARING_INT = str_to_lower(ACT_SHARING_INT),
sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
select (ResponseId,
condition,
JUDGING_IMPACT,
SHARING_PERSP,
SHARING_INT,
SHARING_ACCY,
judging_impact_numerical,
sharing_persp_numerical,
sharing_int_numerical,
sharing_accy_numerical)
# Binding the control and treatment into one table
post_survey_df <-bind_rows(ent_behavior_df_condition_cnrl, ent_behavior_df_condition_trmnt)
# Binding the pre survey and post survey of each individual (60 participants)
pre_post_df <-merge (ent_behavior_df,post_survey_df)
# The data frame with their rating for each video and post survey
ent_df <-merge (ent_behavior_df,pre_post_df)
library(ggplot2)
ggplot(ent_df, aes(x=BELIEVE, fill = fake )) + geom_bar() +
facet_wrap(~condition, nrow=2)
ggplot(ent_df, aes(x=SHARE, fill = fake)) +
geom_bar() +
facet_wrap(~condition, nrow=2)
histogram_df_control<-ent_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)
histinfo=hist(histogram_df_control$believe_numerical)
histogram_df_treatment<-ent_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)
histinfo=hist(histogram_df_treatment$believe_numerical)
#Total path
fit.totaleffect=lm(share_numerical~condition,ent_df)
summary(fit.totaleffect)
##
## Call:
## lm(formula = share_numerical ~ condition, data = ent_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3556 -0.7258 -0.7258 0.6444 4.2742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.72581 0.07566 22.810 < 2e-16 ***
## conditionTreatment 0.62975 0.10789 5.837 7.99e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.459 on 730 degrees of freedom
## Multiple R-squared: 0.04459, Adjusted R-squared: 0.04328
## F-statistic: 34.07 on 1 and 730 DF, p-value: 7.988e-09
fit.mediator=lm(believe_numerical~condition,ent_df)
summary(fit.mediator)
##
## Call:
## lm(formula = believe_numerical ~ condition, data = ent_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.08611 -1.08611 -0.08611 1.09946 2.09946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.90054 0.07456 38.902 <2e-16 ***
## conditionTreatment 0.18557 0.10632 1.745 0.0813 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.438 on 730 degrees of freedom
## Multiple R-squared: 0.004156, Adjusted R-squared: 0.002792
## F-statistic: 3.047 on 1 and 730 DF, p-value: 0.08133
library(mediation)
## Warning: package 'mediation' was built under R version 4.3.1
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)
## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively
## Running nonparametric bootstrap
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME -0.0304 -0.0765 0.00 0.092 .
## ADE 0.6601 0.4538 0.87 <2e-16 ***
## Total Effect 0.6297 0.4208 0.85 <2e-16 ***
## Prop. Mediated -0.0482 -0.1481 0.00 0.092 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 732
##
##
## Simulations: 1000
# OUr data ent_df
one.way <- aov( share_numerical ~ condition, data = ent_df)
summary(one.way)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 72.6 72.56 34.07 7.99e-09 ***
## Residuals 730 1554.5 2.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library (lsr)
## Warning: package 'lsr' was built under R version 4.3.1
# Eta sqr one way
Eta_oneway <-etaSquared(one.way)
print (Eta_oneway)
## eta.sq eta.sq.part
## condition 0.04459247 0.04459247
ent_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
data= ent_df)
summary (ent_twoway_no_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 72.6 72.56 34.94 5.23e-09 ***
## believe_numerical 1 40.5 40.47 19.48 1.17e-05 ***
## Residuals 729 1514.1 2.08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eta sqr one way
Eta_twoway_no_interactions <-etaSquared(ent_twoway_no_interactions)
print (Eta_twoway_no_interactions)
## eta.sq eta.sq.part
## condition 0.04879546 0.04982526
## believe_numerical 0.02487127 0.02603210
ent_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
data=ent_df)
summary ( ent_twoway_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 72.6 72.56 35.189 4.62e-09 ***
## believe_numerical 1 40.5 40.47 19.627 1.09e-05 ***
## condition:believe_numerical 1 13.0 13.03 6.319 0.0122 *
## Residuals 728 1501.0 2.06
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eta sqr one way
Eta_twoway_with_interactions <-etaSquared(ent_twoway_with_interactions)
print (Eta_twoway_with_interactions )
## eta.sq eta.sq.part
## condition 0.04879546 0.050236016
## believe_numerical 0.02487127 0.026252132
## condition:believe_numerical 0.00800756 0.008605317
ent_df |> ggplot(aes(condition,share_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
ent_df |> ggplot(aes(condition,believe_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
# Load the openxlsx package
#install.packages("writexl")
library(writexl)
## Warning: package 'writexl' was built under R version 4.3.2
write_xlsx(
ent_df,"C:/Users/Dell/Downloads/ent_df.xlsx",
col_names = TRUE,
format_headers = TRUE)
#Use the file ent_df the do some bar grapphs on DF and R
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Calculate the mean belief levels and standard errors for 'DF' and 'R' within each condition
belief_stats <- ent_df %>%
group_by(condition, fake) %>%
summarise(
Mean_Belief_Level = mean(believe_numerical),
SE = sd(believe_numerical) / sqrt(n())
) %>%
ungroup()
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
# Plotting the bars for DF and R with error bars
ggplot(belief_stats, aes(x = condition, y = Mean_Belief_Level, fill = fake)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.35) +
geom_errorbar(aes(ymin = Mean_Belief_Level - SE, ymax = Mean_Belief_Level + SE),
position = position_dodge(0.35), width = 0.25) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Belief Level", fill = "Type") +
ggtitle("Mean Belief Levels for DF and R by Condition with Error Bars") +
theme_minimal()
# Diference between the belive level of DF
# Load necessary libraries
library(dplyr)
# Calculate the mean belief levels for 'R' in each condition
r_means <- ent_df %>%
filter(fake == 'DF') %>%
group_by(condition) %>%
summarise(Mean_Belief_Level = mean(believe_numerical))
# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
pull()
# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]
# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.0951612903225807"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 2.5801749271137"
# Load necessary libraries
library(dplyr)
# Calculate the mean belief levels for 'R' in each condition
r_means <- ent_df %>%
filter(fake == 'R') %>%
group_by(condition) %>%
summarise(Mean_Belief_Level = mean(believe_numerical))
# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
pull()
# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]
# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.275985663082437"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 13.0619168787108"
# Read the data from the ent_df
# Separate the belief levels for 'DF' in each condition
df_control_beliefs <- filter(ent_df, fake == 'DF' & condition == 'Control')$believe_numerical
df_treatment_beliefs <- filter(ent_df, fake == 'DF' & condition == 'Treatment')$believe_numerical
# Perform the t-test
t_test_result <- t.test(df_control_beliefs, df_treatment_beliefs)
# Output the t-test result
print(t_test_result)
##
## Welch Two Sample t-test
##
## data: df_control_beliefs and df_treatment_beliefs
## t = -0.6964, df = 360.72, p-value = 0.4866
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3638874 0.1735649
## sample estimates:
## mean of x mean of y
## 3.688172 3.783333
# The t-test for the mean belief levels for "DF" between the Control and Treatment groups yields a t-statistic of approximately -0.697 and a p-value of approximately 0.486.Since the p-value is greater than 0.05 (assuming a standard significance level), we do not have enough evidence to reject the null hypothesis of equal means. This means there is no statistical significance to the difference in 'DF' belief levels between the Control and Treatment groups with a 95% confidence level.
# Separate the belief levels for 'R' in each condition
r_control_beliefs <- filter(ent_df, fake == 'R' & condition == 'Control')$believe_numerical
r_treatment_beliefs <- filter(ent_df, fake == 'R' & condition == 'Treatment')$believe_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_beliefs, r_treatment_beliefs)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_beliefs and r_treatment_beliefs
## t = -2.2863, df = 356.56, p-value = 0.02282
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.51338321 -0.03858812
## sample estimates:
## mean of x mean of y
## 2.112903 2.388889
#The t-test for the mean belief levels for "R" between the Control and Treatment groups yields a t-statistic of approximately -2.29 and a p-value of approximately 0.0226.Since the p-value is less than 0.05 (assuming a standard significance level), we have evidence to reject the null hypothesis of equal means. This indicates that there is a statistically significant difference in 'R' belief levels between the Control and Treatment groups with a 95% confidence level.
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <- ent_df %>% filter(fake == 'DF')
r_data <- ent_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of DF and R between Control and Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c("Control", "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
library(dplyr)
# Filter the data for the DF group
df_data <- filter(ent_df, fake == 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] 0.681362
##
## $percentage_difference
## [1] 47.28856
#results
#The difference in the mean "share_numerical" value for the DF (Deepfake Videos) group between the Treatment and Control conditions is approximately 0.681. In percentage terms, this represents an approximate 47.29% increase in the Treatment group compared to the Control group.
library(dplyr)
# Filter the data for the R group (assuming R group is represented by all non-DF values in 'fake' column)
r_data <- filter(ent_df, fake != 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the R group
r_grouped <- r_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- r_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- r_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
r_difference <- mean_treatment - mean_control
r_percentage_difference <- (r_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = r_difference, percentage_difference = r_percentage_difference)
## $difference
## [1] 0.5781362
##
## $percentage_difference
## [1] 28.75223
#results'
#The difference in the mean "share_numerical" value for the R (Real Videos) group between the Treatment and Control conditions is approximately 0.578. In percentage terms, this represents an approximate 28.75% increase in the Treatment group compared to the Control group.