Deepfake Political Data Wrangling

#Clean Political data frame to explore the effect of measured other variables
study2_data_raw_pol_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Politics (Believability AND Sharing Intentions) - FINAL - Copy_July 28, 2023_06.53.csv")
## Rows: 64 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head (study2_data_raw_pol_all)
## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "6/21/20… "6/21/… "Surv… "100"    "12"                   "TRUE"   "6/21/2023 …
## 4 "6/21/20… "6/21/… "IP A… "100"    "1116"                 "TRUE"   "6/21/2023 …
## 5 "6/26/20… "6/26/… "IP A… "100"    "716"                  "TRUE"   "6/26/2023 …
## 6 "6/26/20… "6/26/… "IP A… "100"    "748"                  "TRUE"   "6/26/2023 …
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …
# Drop the first 4 raws as those were used for test 

study2_data_raw_pol<- study2_data_raw_pol_all[-c(1:4),]

head(study2_data_raw_pol)
## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 6/26/202… 6/26/2… IP Ad… 100      716                    TRUE     6/26/2023 2…
## 2 6/26/202… 6/26/2… IP Ad… 100      748                    TRUE     6/26/2023 2…
## 3 6/26/202… 6/26/2… IP Ad… 100      1174                   TRUE     6/26/2023 2…
## 4 6/26/202… 6/26/2… IP Ad… 100      991                    TRUE     6/26/2023 2…
## 5 6/26/202… 6/26/2… IP Ad… 100      1255                   TRUE     6/26/2023 2…
## 6 6/26/202… 6/26/2… IP Ad… 100      882                    TRUE     6/26/2023 2…
## # ℹ abbreviated name: ¹​`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …
nrow(study2_data_raw_pol)
## [1] 60

Study 2 Political Data cleaning

Likert scales to order levels

#```{r}
#Likert scale has 
likely_values <-  c(
  "very unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "very likely"
)

consume_values <- c(
  "Less than 1 hour per day",
  "1-2 hours per day",
  "2-3 hours per day",
  "3-4 hours per day",
  "5+ hours per day"
)

interest_levels <- c ( "not at all interested in this",
                      "not interested", 
                      "neither not interested nor interested",
                      "interested",
                      "very much interested"
                      )


likley_shory_values <- c("Very unlikely",
                         "Unlikely",
                         "Neither likely nor unlikely",
                         "Likely",
                         "Very likely")


knowledgable_values <- c(
  "very unknowledgeable",
  "somewhat unknowledgeable",
  "neither",
  "somewhat knowledgeable",
  "very knowledgeable"
)
easy_levels <- c("Very difficult",
                 "Difficult",
                 "Neither difficult nor easy",
                 "Easy",
                 "Very easy")

boolen_q <- c( "Yes", "No")

importance_levels <- c ("very unimportant" ,
                       "unimportant",
                       "neither important nor unimportant",
                       "important" ,
                       "very important"
                       )

novel_levels <-c ("not at all novel",
                  "not novel",
                  "neither novel nor not novel",
                  "novel",
                  "very novel")

familiar_levels <-c ("not at all familiar",
                     "not familiar",
                     "neither familiar nor unfamiliar",
                     "familiar",
                     "very familiar")

believe_levels <- c("very unlikely", 
                    "unlikely", 
                    "neither likely nor unlikely", 
                    "likely",
                    "very likely" )

####################
#After the survey, the postsurvey questions

judging_impact_values <- c(
  "extremely unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "extremely likely"
)

sharing_accuracy_level <- c ("not at all important",
                         "moderately important" ,
                       "slightly important",
                       "neither important nor unimportant",
                       "very important" ,
                       "extremely important"
                       )

Wrangling the Initial (Pre) survey data before experiment

#Get all the data into lower case since likert will be all in lower 

pol_individual_df <- study2_data_raw_pol |> 
  mutate(Duration = as.numeric(`Duration (in seconds)`),
         AGE = as.numeric(AGE),
         BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
         browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
         USE_SNS = ordered(USE_SNS, levels = consume_values),
         use_sns = as.numeric(USE_SNS,  levels = consume_values),
         WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
         watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
         SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
         sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
         KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
         EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
         KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
         KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
         know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
         EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
         ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
  dplyr::select(
    ResponseId,
    Duration,
    AGE,
    BROWSE_INTERNET,
    browse_internet,
    USE_SNS,
    use_sns,
    SNS_PLATFORM_USE,
    WATCHING_BEHAVIOR,
    watching_behavior,
    SHARING_BEHAVIOR,
    sharing_behavior,
    KNOW_DEEPFAKE,
    KNOW_CREATE_DF,
    know_create_df,
    EXP_CREATE_DF,
    EASE_CREATE_DF,
    ease_create_df
  ) |> mutate(
    SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
    value = TRUE
  ) |> 
  unnest() |> 
  mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |> 
  pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.

Wrangling of the survey data after control and learning treatment

pol_behavior_df <- study2_data_raw_pol |> 
     select(ResponseId, matches("_R_"), matches ("_DF_")) |> 
     pivot_longer(-ResponseId, values_drop_na = TRUE) |>
     separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
     pivot_wider(names_from = question, values_from = value)|>
     select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
     #Removing the error value created with Dont share 3E
     filter (video != "3E") |>
     mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
         BELIEVE= str_to_lower(BELIEVE),
         believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
         IMPT= str_to_lower(IMPT),
         impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         INTEREST = str_to_lower(INTEREST), 
         INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
         interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         FAMILIAR = str_to_lower (FAMILIAR),
         familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
         NOVEL = str_to_lower(NOVEL),
         novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
  #there is an unnecessary 16E video in the politics, remove that
  filter (video != "16E") |>
   #Select all the numeric values 
   select (ResponseId, 
           fake, 
           video,
           SHARE,
           BELIEVE,
           believe_numerical,
           share_numerical, 
           impt_numerical , 
           interest_numerical,
           familiar_numerical,
           novel_numerical)
  
 
         
#Selecting the Control group and convert likert value to numerical in post survey
pol_behavior_df_condition_trtmnt <-study2_data_raw_pol  |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|> 
     filter (condition == "Treatment") |> 
     mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(ACT_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level))) |>
           select (ResponseId, 
                   condition, 
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)

#Selecting the Treatment group and convert likert value to numerical in post survey
pol_behavior_df_condition_cnrl <-study2_data_raw_pol |> 
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Control") |>
     mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(CTRL_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
           cy_numerical = as.numeric(ordered(CTRL_SHARING_ACCY, levels = sharing_accuracy_level)))|>
     select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)
    

# Binding the control and treatment into one table 
post_survey_df <-bind_rows(pol_behavior_df_condition_cnrl,pol_behavior_df_condition_trtmnt)

# Binding the pre survey and post survey of each individual (60 participants)  
pre_post_df <-merge (pol_individual_df,post_survey_df) 



# The data frame with their rating for each video and post survey 
pol_df <-merge (pol_behavior_df,pre_post_df) 

Basic Histograms

library(ggplot2)

ggplot(pol_df, aes(x=BELIEVE, fill = fake )) +  geom_bar() + 
   facet_wrap(~condition, nrow=2) 

ggplot(pol_df, aes(x=SHARE, fill = fake)) +
  geom_bar() +
  facet_wrap(~condition, nrow=2) 

histogram_df_control<-pol_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)

histinfo=hist(histogram_df_control$believe_numerical)

histogram_df_treatment<-pol_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)

histinfo=hist(histogram_df_treatment$believe_numerical)

Mediation Analysis

Step 1 - The total effect - the main path

#Total path 

fit.totaleffect=lm(share_numerical~condition,pol_df)

summary(fit.totaleffect)
## 
## Call:
## lm(formula = share_numerical ~ condition, data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9138 -0.9138 -0.8548  1.0862  4.1452 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.91379    0.07262  26.355   <2e-16 ***
## conditionTreatment -0.05895    0.10103  -0.584     0.56    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.355 on 718 degrees of freedom
## Multiple R-squared:  0.0004741,  Adjusted R-squared:  -0.000918 
## F-statistic: 0.3405 on 1 and 718 DF,  p-value: 0.5597

STEP 2- The effect of Independent variable Condition to the mediator belivability

fit.mediator=lm(believe_numerical~condition,pol_df)
summary(fit.mediator)
## 
## Call:
## lm(formula = believe_numerical ~ condition, data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1371 -1.1371 -0.1178  1.8629  1.8822 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.11782    0.07826  39.841   <2e-16 ***
## conditionTreatment  0.01928    0.10887   0.177    0.859    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.46 on 718 degrees of freedom
## Multiple R-squared:  4.368e-05,  Adjusted R-squared:  -0.001349 
## F-statistic: 0.03136 on 1 and 718 DF,  p-value: 0.8595

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

fit.dv=lm(share_numerical~condition +believe_numerical,pol_df)
summary(fit.dv)
## 
## Call:
## lm(formula = share_numerical ~ condition + believe_numerical, 
##     data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2460 -0.9323 -0.5626  0.7540  4.2805 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.40293    0.12833  18.724  < 2e-16 ***
## conditionTreatment -0.05593    0.09964  -0.561    0.575    
## believe_numerical  -0.15689    0.03416  -4.593 5.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.336 on 717 degrees of freedom
## Multiple R-squared:  0.02905,    Adjusted R-squared:  0.02634 
## F-statistic: 10.72 on 2 and 717 DF,  p-value: 2.576e-05

STEP 4 Mediation model

library(mediation)
## Warning: package 'mediation' was built under R version 4.3.1
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)
## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively
## Running nonparametric bootstrap
summary(results)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value
## ACME           -0.00302     -0.03740         0.03    0.80
## ADE            -0.05593     -0.24917         0.13    0.54
## Total Effect   -0.05895     -0.25298         0.13    0.51
## Prop. Mediated  0.05131     -2.32004         1.83    0.85
## 
## Sample Size Used: 720 
## 
## 
## Simulations: 1000

Conducting simple ANOVA for treatment and condiition

One way ANOVA

# OUr data ent_df 
one.way <- aov( share_numerical ~ condition, data = pol_df)

summary(one.way)
##              Df Sum Sq Mean Sq F value Pr(>F)
## condition     1    0.6  0.6249   0.341   0.56
## Residuals   718 1317.6  1.8351

Effect size of one way

library (lsr)
## Warning: package 'lsr' was built under R version 4.3.1
# Eta sqr one way 
Eta_oneway <-etaSquared(one.way)

print (Eta_oneway)
##                 eta.sq  eta.sq.part
## condition 0.0004740681 0.0004740681

Effect size of two way with condition and beliveability

ent_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
                                   data= pol_df)

summary (ent_twoway_no_interactions)
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## condition           1    0.6    0.62    0.35    0.554    
## believe_numerical   1   37.7   37.66   21.10 5.15e-06 ***
## Residuals         717 1279.9    1.79                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of one way

# Eta sqr one way 
Eta_twoway_no_interactions <-etaSquared(ent_twoway_no_interactions)

print (Eta_twoway_no_interactions)
##                         eta.sq  eta.sq.part
## condition         0.0004266501 0.0004392199
## believe_numerical 0.0285709542 0.0285845052

Two way ANOVA with Interactions

ANOVA 2 way with interactions

ent_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
                                     data=pol_df)

summary ( ent_twoway_with_interactions)
##                              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition                     1    0.6    0.62   0.350    0.554    
## believe_numerical             1   37.7   37.66  21.096 5.16e-06 ***
## condition:believe_numerical   1    1.7    1.66   0.932    0.335    
## Residuals                   716 1278.2    1.79                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way with interactions

# Eta sqr one way 
Eta_twoway_with_interactions <-etaSquared(ent_twoway_with_interactions)

print (Eta_twoway_with_interactions )
##                                   eta.sq  eta.sq.part
## condition                   0.0004266501 0.0004397914
## believe_numerical           0.0285709542 0.0286206504
## condition:believe_numerical 0.0012623068 0.0013000673
pol_df |> ggplot(aes(condition,share_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

pol_df |> ggplot(aes(condition,believe_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

Sharing and Blieveing discernmant

#understanding the likelihood of believing and the truth (discernment) 

#Those who could not detect deepfakes
could_not_identify_DF1<- pol_df |> filter (fake == "DF", believe_numerical == 1)
could_not_identify_DF2<- pol_df |> filter (fake == "DF", believe_numerical == 2)
wrong_beleive_DF<-rbind(could_not_identify_DF1,could_not_identify_DF2) |> mutate (detect="DF_Wrong") #192

#Those who could not detect real 
could_not_identify_R1<- pol_df |> filter (fake == "R", believe_numerical == 4)
could_not_identify_R2<- pol_df |> filter (fake == "R", believe_numerical == 5)
wrong_beleive_R<-rbind(could_not_identify_R1,could_not_identify_R2) |> mutate (detect="R_Wrong") #115

#Those who were uncertain of deepfakes or real 
uncertain_to_belive_DF<- pol_df |> filter (fake == "DF", believe_numerical == 3)
uncertain_to_belive_F<- pol_df |> filter (fake == "R", believe_numerical == 3)
uncertain_DF_or_R<-rbind(uncertain_to_belive_DF,uncertain_to_belive_F) |> mutate (detect="R_DF_Uncertain") #201

#Those who got it right to detect deepfakes 
identified_correctly_DF1<-pol_df |> filter (fake == "DF", believe_numerical == 4)
identified_correctly_DF2<-pol_df |> filter (fake == "DF", believe_numerical == 5)
correct_belive_DF<-rbind(identified_correctly_DF1,identified_correctly_DF2) |> mutate (detect="DF_Right") #491

#Those who got it right to detect real videos 
identified_correctly_R1<-pol_df |> filter (fake == "R", believe_numerical == 1)
identified_correctly_R2<-pol_df |> filter (fake == "R", believe_numerical == 2)
correct_belive_R<-rbind(identified_correctly_R1,identified_correctly_R2)|> mutate (detect="R_Right") #557

#Those who detect DF and R right 
correct_R_DF <-rbind(correct_belive_DF,correct_belive_R)

#All of the belives combined to table 
All_believes <-rbind(correct_belive_R,
                     correct_belive_DF,
                     wrong_beleive_R, 
                     wrong_beleive_DF,
                     uncertain_DF_or_R)


ggplot(All_believes, aes(x=detect, fill= detect)) +
  geom_bar() 

# Analysing the sharing behavior of deepfakes and real, also the DF and R beliveability diference

# Load the openxlsx package
#install.packages("writexl")
library(writexl)
## Warning: package 'writexl' was built under R version 4.3.2
write_xlsx( pol_df,"C:/Users/Dell/Downloads/pol_df.xlsx", 
        col_names = TRUE, 
        format_headers = TRUE)

#Use the file pol_df the do some bar grapphs on DF and R

# Load necessary libraries
library(ggplot2)
library(dplyr)




# Calculate the mean belief levels and standard errors for 'DF' and 'R' within each condition
belief_stats <- pol_df %>%
  group_by(condition, fake) %>%
  summarise(
    Mean_Belief_Level = mean(believe_numerical),
    SE = sd(believe_numerical) / sqrt(n())
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
# Plotting the bars for DF and R with error bars
ggplot(belief_stats, aes(x = condition, y = Mean_Belief_Level, fill = fake)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.35) +
  geom_errorbar(aes(ymin = Mean_Belief_Level - SE, ymax = Mean_Belief_Level + SE),
                position = position_dodge(0.35), width = 0.25) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Belief Level", fill = "Type") +
  ggtitle("Mean Belief Levels for DF and R by Condition with Error Bars") +
  theme_minimal()

Diference between the belive level of DF in treatment and control

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
  filter(fake == 'DF') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.0337411939191696"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 0.81428124021297"

Diference between R in COntrol and Treatment

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
  filter(fake == 'R') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))
## [1] "Absolute Difference: 0.00482017055988138"
print(paste("Percentage Difference:", r_percentage_difference))
## [1] "Percentage Difference: 0.23041474654378"

Is there a statisitical dinference between DF in Control and Treatment

# Read the data from the pol_df

# Separate the belief levels for 'DF' in each condition
df_control_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Control')$believe_numerical
df_treatment_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result <- t.test(df_control_beliefs, df_treatment_beliefs)

# Output the t-test result
print(t_test_result)
## 
##  Welch Two Sample t-test
## 
## data:  df_control_beliefs and df_treatment_beliefs
## t = -0.2933, df = 357.29, p-value = 0.7695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2599790  0.1924966
## sample estimates:
## mean of x mean of y 
##  4.143678  4.177419

Is there a statisitical dinference between R in Control and Treatment

# Separate the belief levels for 'R' in each condition
r_control_beliefs <- filter(pol_df, fake == 'R' & condition == 'Control')$believe_numerical
r_treatment_beliefs <- filter(pol_df, fake == 'R' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_beliefs, r_treatment_beliefs)

# Output the t-test result
print(t_test_result_r)
## 
##  Welch Two Sample t-test
## 
## data:  r_control_beliefs and r_treatment_beliefs
## t = -0.047066, df = 345.54, p-value = 0.9625
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2062522  0.1966118
## sample estimates:
## mean of x mean of y 
##  2.091954  2.096774

Shere diference in DF and R between cotrol and treatment

library(readxl)
library(dplyr)
library(ggplot2)



# Filter the data for 'DF' and 'R' groups
df_data <- pol_df %>% filter(fake == 'DF')
r_data <- pol_df %>% filter(fake != 'DF')

# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

r_stats <- r_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

# Combine the data
plot_data <- bind_rows(
  mutate(df_stats, group = "DF"),
  mutate(r_stats, group = "R")
)

# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  geom_errorbar(
    aes(ymin = mean - sd, ymax = mean + sd, group = group),
    position = position_dodge(0.7), width = 0.25
  ) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of DF and R between Control and Treatment condition") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_x_discrete(labels = c("Control", "Treatment")) +
  guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))

# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)

what is the diference between DF in Treatment and control in sharing intentions

library(dplyr)


# Filter the data for the DF group
df_data <- filter(pol_df, fake == 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] -0.2059696
## 
## $percentage_difference
## [1] -11.59829
#results 
#The difference in the mean "share_numerical" value for the DF (Deepfake Videos) group between the Treatment and Control conditions is approximately -0.2059696. In percentage terms, this represents an approximate -11.59829% decrease in the Treatment group compared to the Control group.

what is the diference between Real videos sharing intention in Control and Treatment

library(dplyr)

# Filter the data for the R group (assuming R group is represented by all non-DF values in 'fake' column)
r_data <- filter(pol_df, fake != 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the R group
r_grouped <- r_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- r_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- r_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
r_difference <- mean_treatment - mean_control
r_percentage_difference <- (r_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = r_difference, percentage_difference = r_percentage_difference)
## $difference
## [1] 0.08806081
## 
## $percentage_difference
## [1] 4.292039
#results'
#The difference in the mean "share_numerical" value for the R (Real Videos) group between the Treatment and Control conditions is approximately 0.08806081. In percentage terms, this represents an approximate 4.29% increase in the Treatment group compared to the Control group.

Is there a statisitical dinference between DF in Control and Treatment in sharing condition

# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'DF' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$share_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)

# Output the t-test result
print(t_test_result_r)
## 
##  Welch Two Sample t-test
## 
## data:  r_control_share and r_treatment_share
## t = 1.628, df = 348.66, p-value = 0.1044
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04286935  0.45480854
## sample estimates:
## mean of x mean of y 
##  1.775862  1.569892

Is there a statisitical dinference between R in Control and Treatment in sharing condition

# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'R' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'R' & condition == 'Treatment')$share_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)

# Output the t-test result
print(t_test_result_r)
## 
##  Welch Two Sample t-test
## 
## data:  r_control_share and r_treatment_share
## t = -0.56929, df = 355.11, p-value = 0.5695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3922776  0.2161560
## sample estimates:
## mean of x mean of y 
##  2.051724  2.139785

What is the most effecting to sharing from impt_numerical, interest_numerical, novel_numerical and familiar_numerical in the treatment and control conditions.

library(dplyr)
library(broom)


# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')

# Run linear regression models for each group
treatment_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = treatment_data)
control_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = control_data)

# Summarize the models
treatment_summary <- tidy(treatment_model)
control_summary <- tidy(control_model)

# Output the summaries
treatment_summary
## # A tibble: 5 × 5
##   term               estimate std.error statistic   p.value
##   <chr>                 <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)         -0.0514    0.253     -0.203  8.39e- 1
## 2 impt_numerical       0.287     0.0528     5.45   9.40e- 8
## 3 interest_numerical  NA        NA         NA     NA       
## 4 novel_numerical      0.406     0.0643     6.32   7.56e-10
## 5 familiar_numerical  -0.0125    0.0449    -0.278  7.81e- 1
control_summary
## # A tibble: 5 × 5
##   term               estimate std.error statistic   p.value
##   <chr>                 <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)         -0.185     0.242     -0.766  4.44e- 1
## 2 impt_numerical       0.240     0.0563     4.27   2.53e- 5
## 3 interest_numerical  NA        NA         NA     NA       
## 4 novel_numerical      0.457     0.0591     7.74   1.09e-13
## 5 familiar_numerical   0.0275    0.0450     0.611  5.42e- 1

#ploting to a graph

library(dplyr)
library(ggplot2)

# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')

# Function to plot linear models
plot_lm <- function(df, condition) {
  variables <- c('impt_numerical', 'interest_numerical', 'novel_numerical', 'familiar_numerical')
  
  # Create a list to store plots
  plot_list <- list()
  
  for (var in variables) {
    # Fit the linear model
    model <- lm(share_numerical ~ ., data = df[, c('share_numerical', var)])
    
    # Extract the R-squared value
    r_squared <- summary(model)$r.squared
    
    # Create the plot
    p <- ggplot(df, aes_string(x = var, y = 'share_numerical')) +
      geom_point(alpha = 0.5) +
      geom_smooth(method = 'lm', formula = y ~ x, color = 'red') +
      labs(title = paste(var, 'Effect in', condition, 'Condition'),
           subtitle = paste('R-squared =', round(r_squared, 2)),
           x = var,
           y = 'Share Numerical')
    
    # Add the plot to the list
    plot_list[[var]] <- p
  }
  
  return(plot_list)
}

# Plot for treatment group
treatment_plots <- plot_lm(treatment_data, 'Treatment')
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot for control group
control_plots <- plot_lm(control_data, 'Control')

# Display the plots
treatment_plots$impt_numerical

treatment_plots$interest_numerical

treatment_plots$novel_numerical

treatment_plots$familiar_numerical

control_plots$impt_numerical

control_plots$interest_numerical

control_plots$novel_numerical

control_plots$familiar_numerical