#Clean Political data frame to explore the effect of measured other variables
study2_data_raw_pol_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Politics (Believability AND Sharing Intentions) - FINAL - Copy_July 28, 2023_06.53.csv")
## Rows: 64 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head (study2_data_raw_pol_all)
## # A tibble: 6 × 166
## StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "6/21/20… "6/21/… "Surv… "100" "12" "TRUE" "6/21/2023 …
## 4 "6/21/20… "6/21/… "IP A… "100" "1116" "TRUE" "6/21/2023 …
## 5 "6/26/20… "6/26/… "IP A… "100" "716" "TRUE" "6/26/2023 …
## 6 "6/26/20… "6/26/… "IP A… "100" "748" "TRUE" "6/26/2023 …
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## # `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## # USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## # SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## # EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …
# Drop the first 4 raws as those were used for test
study2_data_raw_pol<- study2_data_raw_pol_all[-c(1:4),]
head(study2_data_raw_pol)
## # A tibble: 6 × 166
## StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 6/26/202… 6/26/2… IP Ad… 100 716 TRUE 6/26/2023 2…
## 2 6/26/202… 6/26/2… IP Ad… 100 748 TRUE 6/26/2023 2…
## 3 6/26/202… 6/26/2… IP Ad… 100 1174 TRUE 6/26/2023 2…
## 4 6/26/202… 6/26/2… IP Ad… 100 991 TRUE 6/26/2023 2…
## 5 6/26/202… 6/26/2… IP Ad… 100 1255 TRUE 6/26/2023 2…
## 6 6/26/202… 6/26/2… IP Ad… 100 882 TRUE 6/26/2023 2…
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## # `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## # USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## # SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## # EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …
nrow(study2_data_raw_pol)
## [1] 60
#```{r}
#Likert scale has
likely_values <- c(
"very unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"very likely"
)
consume_values <- c(
"Less than 1 hour per day",
"1-2 hours per day",
"2-3 hours per day",
"3-4 hours per day",
"5+ hours per day"
)
interest_levels <- c ( "not at all interested in this",
"not interested",
"neither not interested nor interested",
"interested",
"very much interested"
)
likley_shory_values <- c("Very unlikely",
"Unlikely",
"Neither likely nor unlikely",
"Likely",
"Very likely")
knowledgable_values <- c(
"very unknowledgeable",
"somewhat unknowledgeable",
"neither",
"somewhat knowledgeable",
"very knowledgeable"
)
easy_levels <- c("Very difficult",
"Difficult",
"Neither difficult nor easy",
"Easy",
"Very easy")
boolen_q <- c( "Yes", "No")
importance_levels <- c ("very unimportant" ,
"unimportant",
"neither important nor unimportant",
"important" ,
"very important"
)
novel_levels <-c ("not at all novel",
"not novel",
"neither novel nor not novel",
"novel",
"very novel")
familiar_levels <-c ("not at all familiar",
"not familiar",
"neither familiar nor unfamiliar",
"familiar",
"very familiar")
believe_levels <- c("very unlikely",
"unlikely",
"neither likely nor unlikely",
"likely",
"very likely" )
####################
#After the survey, the postsurvey questions
judging_impact_values <- c(
"extremely unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"extremely likely"
)
sharing_accuracy_level <- c ("not at all important",
"moderately important" ,
"slightly important",
"neither important nor unimportant",
"very important" ,
"extremely important"
)
#Get all the data into lower case since likert will be all in lower
pol_individual_df <- study2_data_raw_pol |>
mutate(Duration = as.numeric(`Duration (in seconds)`),
AGE = as.numeric(AGE),
BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
USE_SNS = ordered(USE_SNS, levels = consume_values),
use_sns = as.numeric(USE_SNS, levels = consume_values),
WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
dplyr::select(
ResponseId,
Duration,
AGE,
BROWSE_INTERNET,
browse_internet,
USE_SNS,
use_sns,
SNS_PLATFORM_USE,
WATCHING_BEHAVIOR,
watching_behavior,
SHARING_BEHAVIOR,
sharing_behavior,
KNOW_DEEPFAKE,
KNOW_CREATE_DF,
know_create_df,
EXP_CREATE_DF,
EASE_CREATE_DF,
ease_create_df
) |> mutate(
SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
value = TRUE
) |>
unnest() |>
mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |>
pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.
pol_behavior_df <- study2_data_raw_pol |>
select(ResponseId, matches("_R_"), matches ("_DF_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
pivot_wider(names_from = question, values_from = value)|>
select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
#Removing the error value created with Dont share 3E
filter (video != "3E") |>
mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
BELIEVE= str_to_lower(BELIEVE),
believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
#there is an unnecessary 16E video in the politics, remove that
filter (video != "16E") |>
#Select all the numeric values
select (ResponseId,
fake,
video,
SHARE,
BELIEVE,
believe_numerical,
share_numerical,
impt_numerical ,
interest_numerical,
familiar_numerical,
novel_numerical)
#Selecting the Control group and convert likert value to numerical in post survey
pol_behavior_df_condition_trtmnt <-study2_data_raw_pol |>
mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
filter (condition == "Treatment") |>
mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
SHARING_INT = str_to_lower(ACT_SHARING_INT),
sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level))) |>
select (ResponseId,
condition,
JUDGING_IMPACT,
SHARING_PERSP,
SHARING_INT,
SHARING_ACCY,
judging_impact_numerical,
sharing_persp_numerical,
sharing_int_numerical,
sharing_accy_numerical)
#Selecting the Treatment group and convert likert value to numerical in post survey
pol_behavior_df_condition_cnrl <-study2_data_raw_pol |>
mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
filter (condition == "Control") |>
mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
SHARING_INT = str_to_lower(CTRL_SHARING_INT),
sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
cy_numerical = as.numeric(ordered(CTRL_SHARING_ACCY, levels = sharing_accuracy_level)))|>
select (ResponseId,
condition,
JUDGING_IMPACT,
SHARING_PERSP,
SHARING_INT,
SHARING_ACCY,
judging_impact_numerical,
sharing_persp_numerical,
sharing_int_numerical,
sharing_accy_numerical)
# Binding the control and treatment into one table
post_survey_df <-bind_rows(pol_behavior_df_condition_cnrl,pol_behavior_df_condition_trtmnt)
# Binding the pre survey and post survey of each individual (60 participants)
pre_post_df <-merge (pol_individual_df,post_survey_df)
# The data frame with their rating for each video and post survey
pol_df <-merge (pol_behavior_df,pre_post_df)
library(ggplot2)
ggplot(pol_df, aes(x=BELIEVE, fill = fake )) + geom_bar() +
facet_wrap(~condition, nrow=2)
ggplot(pol_df, aes(x=SHARE, fill = fake)) +
geom_bar() +
facet_wrap(~condition, nrow=2)
histogram_df_control<-pol_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)
histinfo=hist(histogram_df_control$believe_numerical)
histogram_df_treatment<-pol_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)
histinfo=hist(histogram_df_treatment$believe_numerical)
#Total path
fit.totaleffect=lm(share_numerical~condition,pol_df)
summary(fit.totaleffect)
##
## Call:
## lm(formula = share_numerical ~ condition, data = pol_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9138 -0.9138 -0.8548 1.0862 4.1452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.91379 0.07262 26.355 <2e-16 ***
## conditionTreatment -0.05895 0.10103 -0.584 0.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.355 on 718 degrees of freedom
## Multiple R-squared: 0.0004741, Adjusted R-squared: -0.000918
## F-statistic: 0.3405 on 1 and 718 DF, p-value: 0.5597
fit.mediator=lm(believe_numerical~condition,pol_df)
summary(fit.mediator)
##
## Call:
## lm(formula = believe_numerical ~ condition, data = pol_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1371 -1.1371 -0.1178 1.8629 1.8822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.11782 0.07826 39.841 <2e-16 ***
## conditionTreatment 0.01928 0.10887 0.177 0.859
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.46 on 718 degrees of freedom
## Multiple R-squared: 4.368e-05, Adjusted R-squared: -0.001349
## F-statistic: 0.03136 on 1 and 718 DF, p-value: 0.8595
library(mediation)
## Warning: package 'mediation' was built under R version 4.3.1
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)
## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively
## Running nonparametric bootstrap
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME -0.00302 -0.03740 0.03 0.80
## ADE -0.05593 -0.24917 0.13 0.54
## Total Effect -0.05895 -0.25298 0.13 0.51
## Prop. Mediated 0.05131 -2.32004 1.83 0.85
##
## Sample Size Used: 720
##
##
## Simulations: 1000
# OUr data ent_df
one.way <- aov( share_numerical ~ condition, data = pol_df)
summary(one.way)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 0.6 0.6249 0.341 0.56
## Residuals 718 1317.6 1.8351
library (lsr)
## Warning: package 'lsr' was built under R version 4.3.1
# Eta sqr one way
Eta_oneway <-etaSquared(one.way)
print (Eta_oneway)
## eta.sq eta.sq.part
## condition 0.0004740681 0.0004740681
ent_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
data= pol_df)
summary (ent_twoway_no_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 0.6 0.62 0.35 0.554
## believe_numerical 1 37.7 37.66 21.10 5.15e-06 ***
## Residuals 717 1279.9 1.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eta sqr one way
Eta_twoway_no_interactions <-etaSquared(ent_twoway_no_interactions)
print (Eta_twoway_no_interactions)
## eta.sq eta.sq.part
## condition 0.0004266501 0.0004392199
## believe_numerical 0.0285709542 0.0285845052
ent_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
data=pol_df)
summary ( ent_twoway_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 0.6 0.62 0.350 0.554
## believe_numerical 1 37.7 37.66 21.096 5.16e-06 ***
## condition:believe_numerical 1 1.7 1.66 0.932 0.335
## Residuals 716 1278.2 1.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eta sqr one way
Eta_twoway_with_interactions <-etaSquared(ent_twoway_with_interactions)
print (Eta_twoway_with_interactions )
## eta.sq eta.sq.part
## condition 0.0004266501 0.0004397914
## believe_numerical 0.0285709542 0.0286206504
## condition:believe_numerical 0.0012623068 0.0013000673
pol_df |> ggplot(aes(condition,share_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
pol_df |> ggplot(aes(condition,believe_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
# Load the openxlsx package
#install.packages("writexl")
library(writexl)
## Warning: package 'writexl' was built under R version 4.3.2
write_xlsx( pol_df,"C:/Users/Dell/Downloads/pol_df.xlsx",
col_names = TRUE,
format_headers = TRUE)
#Use the file pol_df the do some bar grapphs on DF and R
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Calculate the mean belief levels and standard errors for 'DF' and 'R' within each condition
belief_stats <- pol_df %>%
group_by(condition, fake) %>%
summarise(
Mean_Belief_Level = mean(believe_numerical),
SE = sd(believe_numerical) / sqrt(n())
) %>%
ungroup()
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
# Plotting the bars for DF and R with error bars
ggplot(belief_stats, aes(x = condition, y = Mean_Belief_Level, fill = fake)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.35) +
geom_errorbar(aes(ymin = Mean_Belief_Level - SE, ymax = Mean_Belief_Level + SE),
position = position_dodge(0.35), width = 0.25) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Belief Level", fill = "Type") +
ggtitle("Mean Belief Levels for DF and R by Condition with Error Bars") +
theme_minimal()
# Load necessary libraries
library(dplyr)
# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
filter(fake == 'DF') %>%
group_by(condition) %>%
summarise(Mean_Belief_Level = mean(believe_numerical))
# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
pull()
# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]
# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.0337411939191696"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 0.81428124021297"
# Load necessary libraries
library(dplyr)
# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
filter(fake == 'R') %>%
group_by(condition) %>%
summarise(Mean_Belief_Level = mean(believe_numerical))
# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
pull()
# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]
# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.00482017055988138"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 0.23041474654378"
# Read the data from the pol_df
# Separate the belief levels for 'DF' in each condition
df_control_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Control')$believe_numerical
df_treatment_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$believe_numerical
# Perform the t-test
t_test_result <- t.test(df_control_beliefs, df_treatment_beliefs)
# Output the t-test result
print(t_test_result)
##
## Welch Two Sample t-test
##
## data: df_control_beliefs and df_treatment_beliefs
## t = -0.2933, df = 357.29, p-value = 0.7695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2599790 0.1924966
## sample estimates:
## mean of x mean of y
## 4.143678 4.177419
# Separate the belief levels for 'R' in each condition
r_control_beliefs <- filter(pol_df, fake == 'R' & condition == 'Control')$believe_numerical
r_treatment_beliefs <- filter(pol_df, fake == 'R' & condition == 'Treatment')$believe_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_beliefs, r_treatment_beliefs)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_beliefs and r_treatment_beliefs
## t = -0.047066, df = 345.54, p-value = 0.9625
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2062522 0.1966118
## sample estimates:
## mean of x mean of y
## 2.091954 2.096774
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <- pol_df %>% filter(fake == 'DF')
r_data <- pol_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of DF and R between Control and Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c("Control", "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
library(dplyr)
# Filter the data for the DF group
df_data <- filter(pol_df, fake == 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] -0.2059696
##
## $percentage_difference
## [1] -11.59829
#results
#The difference in the mean "share_numerical" value for the DF (Deepfake Videos) group between the Treatment and Control conditions is approximately -0.2059696. In percentage terms, this represents an approximate -11.59829% decrease in the Treatment group compared to the Control group.
library(dplyr)
# Filter the data for the R group (assuming R group is represented by all non-DF values in 'fake' column)
r_data <- filter(pol_df, fake != 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the R group
r_grouped <- r_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- r_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- r_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
r_difference <- mean_treatment - mean_control
r_percentage_difference <- (r_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = r_difference, percentage_difference = r_percentage_difference)
## $difference
## [1] 0.08806081
##
## $percentage_difference
## [1] 4.292039
#results'
#The difference in the mean "share_numerical" value for the R (Real Videos) group between the Treatment and Control conditions is approximately 0.08806081. In percentage terms, this represents an approximate 4.29% increase in the Treatment group compared to the Control group.
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'DF' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = 1.628, df = 348.66, p-value = 0.1044
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04286935 0.45480854
## sample estimates:
## mean of x mean of y
## 1.775862 1.569892
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'R' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'R' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = -0.56929, df = 355.11, p-value = 0.5695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3922776 0.2161560
## sample estimates:
## mean of x mean of y
## 2.051724 2.139785
library(dplyr)
library(broom)
# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')
# Run linear regression models for each group
treatment_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = treatment_data)
control_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = control_data)
# Summarize the models
treatment_summary <- tidy(treatment_model)
control_summary <- tidy(control_model)
# Output the summaries
treatment_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.0514 0.253 -0.203 8.39e- 1
## 2 impt_numerical 0.287 0.0528 5.45 9.40e- 8
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.406 0.0643 6.32 7.56e-10
## 5 familiar_numerical -0.0125 0.0449 -0.278 7.81e- 1
control_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.185 0.242 -0.766 4.44e- 1
## 2 impt_numerical 0.240 0.0563 4.27 2.53e- 5
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.457 0.0591 7.74 1.09e-13
## 5 familiar_numerical 0.0275 0.0450 0.611 5.42e- 1
#ploting to a graph
library(dplyr)
library(ggplot2)
# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')
# Function to plot linear models
plot_lm <- function(df, condition) {
variables <- c('impt_numerical', 'interest_numerical', 'novel_numerical', 'familiar_numerical')
# Create a list to store plots
plot_list <- list()
for (var in variables) {
# Fit the linear model
model <- lm(share_numerical ~ ., data = df[, c('share_numerical', var)])
# Extract the R-squared value
r_squared <- summary(model)$r.squared
# Create the plot
p <- ggplot(df, aes_string(x = var, y = 'share_numerical')) +
geom_point(alpha = 0.5) +
geom_smooth(method = 'lm', formula = y ~ x, color = 'red') +
labs(title = paste(var, 'Effect in', condition, 'Condition'),
subtitle = paste('R-squared =', round(r_squared, 2)),
x = var,
y = 'Share Numerical')
# Add the plot to the list
plot_list[[var]] <- p
}
return(plot_list)
}
# Plot for treatment group
treatment_plots <- plot_lm(treatment_data, 'Treatment')
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot for control group
control_plots <- plot_lm(control_data, 'Control')
# Display the plots
treatment_plots$impt_numerical
treatment_plots$interest_numerical
treatment_plots$novel_numerical
treatment_plots$familiar_numerical
control_plots$impt_numerical
control_plots$interest_numerical
control_plots$novel_numerical
control_plots$familiar_numerical