Deepfake Political Data Wrangling

#Clean Political data frame to explore the effect of measured other variables
study2_data_raw_pol_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Politics (Believability AND Sharing Intentions) - FINAL - Copy_July 28, 2023_06.53.csv")

## Rows: 64 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head (study2_data_raw_pol_all)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "6/21/20… "6/21/… "Surv… "100"    "12"                   "TRUE"   "6/21/2023 …
## 4 "6/21/20… "6/21/… "IP A… "100"    "1116"                 "TRUE"   "6/21/2023 …
## 5 "6/26/20… "6/26/… "IP A… "100"    "716"                  "TRUE"   "6/26/2023 …
## 6 "6/26/20… "6/26/… "IP A… "100"    "748"                  "TRUE"   "6/26/2023 …
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …

# Drop the first 4 raws as those were used for test 

study2_data_raw_pol<- study2_data_raw_pol_all[-c(1:4),]

head(study2_data_raw_pol)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 6/26/202… 6/26/2… IP Ad… 100      716                    TRUE     6/26/2023 2…
## 2 6/26/202… 6/26/2… IP Ad… 100      748                    TRUE     6/26/2023 2…
## 3 6/26/202… 6/26/2… IP Ad… 100      1174                   TRUE     6/26/2023 2…
## 4 6/26/202… 6/26/2… IP Ad… 100      991                    TRUE     6/26/2023 2…
## 5 6/26/202… 6/26/2… IP Ad… 100      1255                   TRUE     6/26/2023 2…
## 6 6/26/202… 6/26/2… IP Ad… 100      882                    TRUE     6/26/2023 2…
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …

nrow(study2_data_raw_pol)

## [1] 60

Study 2 Political Data cleaning

Likert scales to order levels

#```{r}
#Likert scale has 
likely_values <-  c(
  "very unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "very likely"
)

consume_values <- c(
  "Less than 1 hour per day",
  "1-2 hours per day",
  "2-3 hours per day",
  "3-4 hours per day",
  "5+ hours per day"
)

interest_levels <- c ( "not at all interested in this",
                      "not interested", 
                      "neither not interested nor interested",
                      "interested",
                      "very much interested"
                      )


likley_shory_values <- c("Very unlikely",
                         "Unlikely",
                         "Neither likely nor unlikely",
                         "Likely",
                         "Very likely")


knowledgable_values <- c(
  "very unknowledgeable",
  "somewhat unknowledgeable",
  "neither",
  "somewhat knowledgeable",
  "very knowledgeable"
)
easy_levels <- c("Very difficult",
                 "Difficult",
                 "Neither difficult nor easy",
                 "Easy",
                 "Very easy")

boolen_q <- c( "Yes", "No")

importance_levels <- c ("very unimportant" ,
                       "unimportant",
                       "neither important nor unimportant",
                       "important" ,
                       "very important"
                       )

novel_levels <-c ("not at all novel",
                  "not novel",
                  "neither novel nor not novel",
                  "novel",
                  "very novel")

familiar_levels <-c ("not at all familiar",
                     "not familiar",
                     "neither familiar nor unfamiliar",
                     "familiar",
                     "very familiar")

believe_levels <- c("very unlikely", 
                    "unlikely", 
                    "neither likely nor unlikely", 
                    "likely",
                    "very likely" )

####################
#After the survey, the postsurvey questions

judging_impact_values <- c(
  "extremely unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "extremely likely"
)

sharing_accuracy_level <- c ("not at all important",
                         "moderately important" ,
                       "slightly important",
                       "neither important nor unimportant",
                       "very important" ,
                       "extremely important"
                       )

Wrangling the Initial (Pre) survey data before experiment

#Get all the data into lower case since likert will be all in lower 

pol_individual_df <- study2_data_raw_pol |> 
  mutate(Duration = as.numeric(`Duration (in seconds)`),
         AGE = as.numeric(AGE),
         BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
         browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
         USE_SNS = ordered(USE_SNS, levels = consume_values),
         use_sns = as.numeric(USE_SNS,  levels = consume_values),
         WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
         watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
         SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
         sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
         KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
         EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
         KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
         KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
         know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
         EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
         ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
  dplyr::select(
    ResponseId,
    Duration,
    AGE,
    BROWSE_INTERNET,
    browse_internet,
    USE_SNS,
    use_sns,
    SNS_PLATFORM_USE,
    WATCHING_BEHAVIOR,
    watching_behavior,
    SHARING_BEHAVIOR,
    sharing_behavior,
    KNOW_DEEPFAKE,
    KNOW_CREATE_DF,
    know_create_df,
    EXP_CREATE_DF,
    EASE_CREATE_DF,
    ease_create_df
  ) |> mutate(
    SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
    value = TRUE
  ) |> 
  unnest() |> 
  mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |> 
  pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)

## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.

Wrangling of the survey data after control and learning treatment

pol_behavior_df <- study2_data_raw_pol |> 
     select(ResponseId, matches("_R_"), matches ("_DF_")) |> 
     pivot_longer(-ResponseId, values_drop_na = TRUE) |>
     separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
     pivot_wider(names_from = question, values_from = value)|>
     select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
     #Removing the error value created with Dont share 3E
     filter (video != "3E") |>
     mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
         BELIEVE= str_to_lower(BELIEVE),
         believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
         IMPT= str_to_lower(IMPT),
         impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         INTEREST = str_to_lower(INTEREST), 
         INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
         interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         FAMILIAR = str_to_lower (FAMILIAR),
         familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
         NOVEL = str_to_lower(NOVEL),
         novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
  #there is an unnecessary 16E video in the politics, remove that
  filter (video != "16E") |>
   #Select all the numeric values 
   select (ResponseId, 
           fake, 
           video,
           SHARE,
           BELIEVE,
           believe_numerical,
           share_numerical, 
           impt_numerical , 
           interest_numerical,
           familiar_numerical,
           novel_numerical)
  
 
         
#Selecting the Control group and convert likert value to numerical in post survey
pol_behavior_df_condition_trtmnt <-study2_data_raw_pol  |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|> 
     filter (condition == "Treatment") |> 
     mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(ACT_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level))) |>
           select (ResponseId, 
                   condition, 
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)

#Selecting the Treatment group and convert likert value to numerical in post survey
pol_behavior_df_condition_cnrl <-study2_data_raw_pol |> 
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Control") |>
     mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(CTRL_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
           cy_numerical = as.numeric(ordered(CTRL_SHARING_ACCY, levels = sharing_accuracy_level)))|>
     select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)
    

# Binding the control and treatment into one table 
post_survey_df <-bind_rows(pol_behavior_df_condition_cnrl,pol_behavior_df_condition_trtmnt)

# Binding the pre survey and post survey of each individual (60 participants)  
pre_post_df <-merge (pol_individual_df,post_survey_df) 



# The data frame with their rating for each video and post survey 
pol_df <-merge (pol_behavior_df,pre_post_df)

Basic Histograms

library(ggplot2)

ggplot(pol_df, aes(x=BELIEVE, fill = fake )) +  geom_bar() + 
   facet_wrap(~condition, nrow=2)

ggplot(pol_df, aes(x=SHARE, fill = fake)) +
  geom_bar() +
  facet_wrap(~condition, nrow=2)

histogram_df_control<-pol_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)

histinfo=hist(histogram_df_control$believe_numerical)

histogram_df_treatment<-pol_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)

histinfo=hist(histogram_df_treatment$believe_numerical)

Mediation Analysis

Step 1 - The total effect - the main path

#Total path 

fit.totaleffect=lm(share_numerical~condition,pol_df)

summary(fit.totaleffect)

## 
## Call:
## lm(formula = share_numerical ~ condition, data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9138 -0.9138 -0.8548  1.0862  4.1452 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.91379    0.07262  26.355   <2e-16 ***
## conditionTreatment -0.05895    0.10103  -0.584     0.56    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.355 on 718 degrees of freedom
## Multiple R-squared:  0.0004741,  Adjusted R-squared:  -0.000918 
## F-statistic: 0.3405 on 1 and 718 DF,  p-value: 0.5597

STEP 2- The effect of Independent variable Condition to the mediator belivability

fit.mediator=lm(believe_numerical~condition,pol_df)
summary(fit.mediator)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1371 -1.1371 -0.1178  1.8629  1.8822 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.11782    0.07826  39.841   <2e-16 ***
## conditionTreatment  0.01928    0.10887   0.177    0.859    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.46 on 718 degrees of freedom
## Multiple R-squared:  4.368e-05,  Adjusted R-squared:  -0.001349 
## F-statistic: 0.03136 on 1 and 718 DF,  p-value: 0.8595

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

fit.dv=lm(share_numerical~condition +believe_numerical,pol_df)
summary(fit.dv)

## 
## Call:
## lm(formula = share_numerical ~ condition + believe_numerical, 
##     data = pol_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2460 -0.9323 -0.5626  0.7540  4.2805 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.40293    0.12833  18.724  < 2e-16 ***
## conditionTreatment -0.05593    0.09964  -0.561    0.575    
## believe_numerical  -0.15689    0.03416  -4.593 5.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.336 on 717 degrees of freedom
## Multiple R-squared:  0.02905,    Adjusted R-squared:  0.02634 
## F-statistic: 10.72 on 2 and 717 DF,  p-value: 2.576e-05

STEP 4 Mediation model

library(mediation)

## Warning: package 'mediation' was built under R version 4.3.1

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: mvtnorm

## Loading required package: sandwich

## mediation: Causal Mediation Analysis
## Version: 4.5.0

results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)

## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively

## Running nonparametric bootstrap

summary(results)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value
## ACME           -0.00302     -0.03740         0.03    0.80
## ADE            -0.05593     -0.24917         0.13    0.54
## Total Effect   -0.05895     -0.25298         0.13    0.51
## Prop. Mediated  0.05131     -2.32004         1.83    0.85
## 
## Sample Size Used: 720 
## 
## 
## Simulations: 1000

Conducting simple ANOVA for treatment and condiition

One way ANOVA

# OUr data ent_df 
one.way <- aov( share_numerical ~ condition, data = pol_df)

summary(one.way)

##              Df Sum Sq Mean Sq F value Pr(>F)
## condition     1    0.6  0.6249   0.341   0.56
## Residuals   718 1317.6  1.8351

Effect size of one way

library (lsr)

## Warning: package 'lsr' was built under R version 4.3.1

# Eta sqr one way 
Eta_oneway <-etaSquared(one.way)

print (Eta_oneway)

##                 eta.sq  eta.sq.part
## condition 0.0004740681 0.0004740681

Effect size of two way with condition and beliveability

ent_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
                                   data= pol_df)

summary (ent_twoway_no_interactions)

##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## condition           1    0.6    0.62    0.35    0.554    
## believe_numerical   1   37.7   37.66   21.10 5.15e-06 ***
## Residuals         717 1279.9    1.79                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of one way

# Eta sqr one way 
Eta_twoway_no_interactions <-etaSquared(ent_twoway_no_interactions)

print (Eta_twoway_no_interactions)

##                         eta.sq  eta.sq.part
## condition         0.0004266501 0.0004392199
## believe_numerical 0.0285709542 0.0285845052

Two way ANOVA with Interactions

ANOVA 2 way with interactions

ent_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
                                     data=pol_df)

summary ( ent_twoway_with_interactions)

##                              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition                     1    0.6    0.62   0.350    0.554    
## believe_numerical             1   37.7   37.66  21.096 5.16e-06 ***
## condition:believe_numerical   1    1.7    1.66   0.932    0.335    
## Residuals                   716 1278.2    1.79                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way with interactions

# Eta sqr one way 
Eta_twoway_with_interactions <-etaSquared(ent_twoway_with_interactions)

print (Eta_twoway_with_interactions )

##                                   eta.sq  eta.sq.part
## condition                   0.0004266501 0.0004397914
## believe_numerical           0.0285709542 0.0286206504
## condition:believe_numerical 0.0012623068 0.0013000673

pol_df |> ggplot(aes(condition,share_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

pol_df |> ggplot(aes(condition,believe_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

Sharing and Blieveing discernmant

#understanding the likelihood of believing and the truth (discernment) 

#Those who could not detect deepfakes
could_not_identify_DF1<- pol_df |> filter (fake == "DF", believe_numerical == 1)
could_not_identify_DF2<- pol_df |> filter (fake == "DF", believe_numerical == 2)
wrong_beleive_DF<-rbind(could_not_identify_DF1,could_not_identify_DF2) |> mutate (detect="DF_Wrong") #192

#Those who could not detect real 
could_not_identify_R1<- pol_df |> filter (fake == "R", believe_numerical == 4)
could_not_identify_R2<- pol_df |> filter (fake == "R", believe_numerical == 5)
wrong_beleive_R<-rbind(could_not_identify_R1,could_not_identify_R2) |> mutate (detect="R_Wrong") #115

#Those who were uncertain of deepfakes or real 
uncertain_to_belive_DF<- pol_df |> filter (fake == "DF", believe_numerical == 3)
uncertain_to_belive_F<- pol_df |> filter (fake == "R", believe_numerical == 3)
uncertain_DF_or_R<-rbind(uncertain_to_belive_DF,uncertain_to_belive_F) |> mutate (detect="R_DF_Uncertain") #201

#Those who got it right to detect deepfakes 
identified_correctly_DF1<-pol_df |> filter (fake == "DF", believe_numerical == 4)
identified_correctly_DF2<-pol_df |> filter (fake == "DF", believe_numerical == 5)
correct_belive_DF<-rbind(identified_correctly_DF1,identified_correctly_DF2) |> mutate (detect="DF_Right") #491

#Those who got it right to detect real videos 
identified_correctly_R1<-pol_df |> filter (fake == "R", believe_numerical == 1)
identified_correctly_R2<-pol_df |> filter (fake == "R", believe_numerical == 2)
correct_belive_R<-rbind(identified_correctly_R1,identified_correctly_R2)|> mutate (detect="R_Right") #557

#Those who detect DF and R right 
correct_R_DF <-rbind(correct_belive_DF,correct_belive_R)

#All of the belives combined to table 
All_believes <-rbind(correct_belive_R,
                     correct_belive_DF,
                     wrong_beleive_R, 
                     wrong_beleive_DF,
                     uncertain_DF_or_R)


ggplot(All_believes, aes(x=detect, fill= detect)) +
  geom_bar()

# Analysing the sharing behavior of deepfakes and real, also the DF and R beliveability diference

# Load the openxlsx package
#install.packages("writexl")
library(writexl)

## Warning: package 'writexl' was built under R version 4.3.2

write_xlsx( pol_df,"C:/Users/Dell/Downloads/pol_df.xlsx", 
        col_names = TRUE, 
        format_headers = TRUE)

#Use the file pol_df the do some bar grapphs on DF and R

# Load necessary libraries
library(ggplot2)
library(dplyr)




# Calculate the mean belief levels and standard errors for 'DF' and 'R' within each condition
belief_stats <- pol_df %>%
  group_by(condition, fake) %>%
  summarise(
    Mean_Belief_Level = mean(believe_numerical),
    SE = sd(believe_numerical) / sqrt(n())
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.

# Plotting the bars for DF and R with error bars
ggplot(belief_stats, aes(x = condition, y = Mean_Belief_Level, fill = fake)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.35) +
  geom_errorbar(aes(ymin = Mean_Belief_Level - SE, ymax = Mean_Belief_Level + SE),
                position = position_dodge(0.35), width = 0.25) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Belief Level", fill = "Type") +
  ggtitle("Mean Belief Levels for DF and R by Condition with Error Bars") +
  theme_minimal()

Diference between the belive level of DF in treatment and control

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
  filter(fake == 'DF') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))

## [1] "Absolute Difference: 0.0337411939191696"

print(paste("Percentage Difference:", r_percentage_difference))

## [1] "Percentage Difference: 0.81428124021297"

Diference between R in COntrol and Treatment

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- pol_df %>%
  filter(fake == 'R') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))

## [1] "Absolute Difference: 0.00482017055988138"

print(paste("Percentage Difference:", r_percentage_difference))

## [1] "Percentage Difference: 0.23041474654378"

Is there a statisitical dinference between DF in Control and Treatment

# Read the data from the pol_df

# Separate the belief levels for 'DF' in each condition
df_control_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Control')$believe_numerical
df_treatment_beliefs <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result <- t.test(df_control_beliefs, df_treatment_beliefs)

# Output the t-test result
print(t_test_result)

## 
##  Welch Two Sample t-test
## 
## data:  df_control_beliefs and df_treatment_beliefs
## t = -0.2933, df = 357.29, p-value = 0.7695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2599790  0.1924966
## sample estimates:
## mean of x mean of y 
##  4.143678  4.177419

Is there a statisitical dinference between R in Control and Treatment

# Separate the belief levels for 'R' in each condition
r_control_beliefs <- filter(pol_df, fake == 'R' & condition == 'Control')$believe_numerical
r_treatment_beliefs <- filter(pol_df, fake == 'R' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_beliefs, r_treatment_beliefs)

# Output the t-test result
print(t_test_result_r)

## 
##  Welch Two Sample t-test
## 
## data:  r_control_beliefs and r_treatment_beliefs
## t = -0.047066, df = 345.54, p-value = 0.9625
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2062522  0.1966118
## sample estimates:
## mean of x mean of y 
##  2.091954  2.096774

Shere diference in DF and R between cotrol and treatment

library(readxl)
library(dplyr)
library(ggplot2)



# Filter the data for 'DF' and 'R' groups
df_data <- pol_df %>% filter(fake == 'DF')
r_data <- pol_df %>% filter(fake != 'DF')

# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

r_stats <- r_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

# Combine the data
plot_data <- bind_rows(
  mutate(df_stats, group = "DF"),
  mutate(r_stats, group = "R")
)

# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  geom_errorbar(
    aes(ymin = mean - sd, ymax = mean + sd, group = group),
    position = position_dodge(0.7), width = 0.25
  ) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of DF and R between Control and Treatment condition") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_x_discrete(labels = c("Control", "Treatment")) +
  guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))

# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)

what is the diference between DF in Treatment and control in sharing intentions

library(dplyr)


# Filter the data for the DF group
df_data <- filter(pol_df, fake == 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)

## $difference
## [1] -0.2059696
## 
## $percentage_difference
## [1] -11.59829

#results 
#The difference in the mean "share_numerical" value for the DF (Deepfake Videos) group between the Treatment and Control conditions is approximately -0.2059696. In percentage terms, this represents an approximate -11.59829% decrease in the Treatment group compared to the Control group.

what is the diference between Real videos sharing intention in Control and Treatment

library(dplyr)

# Filter the data for the R group (assuming R group is represented by all non-DF values in 'fake' column)
r_data <- filter(pol_df, fake != 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the R group
r_grouped <- r_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- r_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- r_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
r_difference <- mean_treatment - mean_control
r_percentage_difference <- (r_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = r_difference, percentage_difference = r_percentage_difference)

## $difference
## [1] 0.08806081
## 
## $percentage_difference
## [1] 4.292039

#results'
#The difference in the mean "share_numerical" value for the R (Real Videos) group between the Treatment and Control conditions is approximately 0.08806081. In percentage terms, this represents an approximate 4.29% increase in the Treatment group compared to the Control group.

Is there a statisitical dinference between DF in Control and Treatment in sharing condition

# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'DF' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'DF' & condition == 'Treatment')$share_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)

# Output the t-test result
print(t_test_result_r)

## 
##  Welch Two Sample t-test
## 
## data:  r_control_share and r_treatment_share
## t = 1.628, df = 348.66, p-value = 0.1044
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04286935  0.45480854
## sample estimates:
## mean of x mean of y 
##  1.775862  1.569892

Is there a statisitical dinference between R in Control and Treatment in sharing condition

# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_df, fake == 'R' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_df, fake == 'R' & condition == 'Treatment')$share_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)

# Output the t-test result
print(t_test_result_r)

## 
##  Welch Two Sample t-test
## 
## data:  r_control_share and r_treatment_share
## t = -0.56929, df = 355.11, p-value = 0.5695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3922776  0.2161560
## sample estimates:
## mean of x mean of y 
##  2.051724  2.139785

What is the most effecting to sharing from impt_numerical, interest_numerical, novel_numerical and familiar_numerical in the treatment and control conditions.

library(dplyr)
library(broom)


# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')

# Run linear regression models for each group
treatment_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = treatment_data)
control_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = control_data)

# Summarize the models
treatment_summary <- tidy(treatment_model)
control_summary <- tidy(control_model)

# Output the summaries
treatment_summary

## # A tibble: 5 × 5
##   term               estimate std.error statistic   p.value
##   <chr>                 <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)         -0.0514    0.253     -0.203  8.39e- 1
## 2 impt_numerical       0.287     0.0528     5.45   9.40e- 8
## 3 interest_numerical  NA        NA         NA     NA       
## 4 novel_numerical      0.406     0.0643     6.32   7.56e-10
## 5 familiar_numerical  -0.0125    0.0449    -0.278  7.81e- 1

control_summary

## # A tibble: 5 × 5
##   term               estimate std.error statistic   p.value
##   <chr>                 <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)         -0.185     0.242     -0.766  4.44e- 1
## 2 impt_numerical       0.240     0.0563     4.27   2.53e- 5
## 3 interest_numerical  NA        NA         NA     NA       
## 4 novel_numerical      0.457     0.0591     7.74   1.09e-13
## 5 familiar_numerical   0.0275    0.0450     0.611  5.42e- 1

#ploting to a graph

library(dplyr)
library(ggplot2)

# Split the data into treatment and control groups
treatment_data <- filter(pol_df, condition == 'Treatment')
control_data <- filter(pol_df, condition == 'Control')

# Function to plot linear models
plot_lm <- function(df, condition) {
  variables <- c('impt_numerical', 'interest_numerical', 'novel_numerical', 'familiar_numerical')
  
  # Create a list to store plots
  plot_list <- list()
  
  for (var in variables) {
    # Fit the linear model
    model <- lm(share_numerical ~ ., data = df[, c('share_numerical', var)])
    
    # Extract the R-squared value
    r_squared <- summary(model)$r.squared
    
    # Create the plot
    p <- ggplot(df, aes_string(x = var, y = 'share_numerical')) +
      geom_point(alpha = 0.5) +
      geom_smooth(method = 'lm', formula = y ~ x, color = 'red') +
      labs(title = paste(var, 'Effect in', condition, 'Condition'),
           subtitle = paste('R-squared =', round(r_squared, 2)),
           x = var,
           y = 'Share Numerical')
    
    # Add the plot to the list
    plot_list[[var]] <- p
  }
  
  return(plot_list)
}

# Plot for treatment group
treatment_plots <- plot_lm(treatment_data, 'Treatment')

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Plot for control group
control_plots <- plot_lm(control_data, 'Control')

# Display the plots
treatment_plots$impt_numerical

treatment_plots$interest_numerical

treatment_plots$novel_numerical

treatment_plots$familiar_numerical

control_plots$impt_numerical

control_plots$interest_numerical

control_plots$novel_numerical

control_plots$familiar_numerical

Study 2 Political data Mediation Analysis

Dilrukshi

2023-11-07

Deepfake Political Data Wrangling

Study 2 Political Data cleaning

Likert scales to order levels

Wrangling the Initial (Pre) survey data before experiment

Wrangling of the survey data after control and learning treatment

Basic Histograms

Mediation Analysis

Step 1 - The total effect - the main path

STEP 2- The effect of Independent variable Condition to the mediator belivability

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

STEP 4 Mediation model

Conducting simple ANOVA for treatment and condiition

One way ANOVA

Effect size of one way

Effect size of two way with condition and beliveability

Effect size of one way

Two way ANOVA with Interactions

ANOVA 2 way with interactions

Effect size of two way with interactions

Diference between the belive level of DF in treatment and control

Diference between R in COntrol and Treatment

Is there a statisitical dinference between DF in Control and Treatment

Is there a statisitical dinference between R in Control and Treatment

Shere diference in DF and R between cotrol and treatment