Deepfake Entertainment Data Wrangling

#Clean Entertainment data frame to explore the effect of measured other variables
study2_data_raw_ent_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Entertainment (Believability AND Sharing Intentions) - FINAL_July 28, 2023_06.18.csv")

## Rows: 63 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head (study2_data_raw_ent_all)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "2023-07… "2023-… "IP A… "100"    "785"                  "True"   "2023-07-02…
## 4 "2023-07… "2023-… "IP A… "100"    "1411"                 "True"   "2023-07-02…
## 5 "2023-07… "2023-… "IP A… "100"    "1686"                 "True"   "2023-07-02…
## 6 "2023-07… "2023-… "IP A… "100"    "741"                  "True"   "2023-07-02…
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …

# Drop the first 4 raws as those were used for test 

study2_data_all_ent<- study2_data_raw_ent_all[-c(1:2),]

head(study2_data_all_ent)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 2023-07-… 2023-0… IP Ad… 100      785                    True     2023-07-02 …
## 2 2023-07-… 2023-0… IP Ad… 100      1411                   True     2023-07-02 …
## 3 2023-07-… 2023-0… IP Ad… 100      1686                   True     2023-07-02 …
## 4 2023-07-… 2023-0… IP Ad… 100      741                    True     2023-07-02 …
## 5 2023-07-… 2023-0… IP Ad… 100      735                    True     2023-07-02 …
## 6 2023-07-… 2023-0… IP Ad… 100      1578                   True     2023-07-02 …
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …

nrow(study2_data_all_ent)

## [1] 61

Study 2 Entertainment Data cleaning

Likert scales to order levels

#```{r}
#Likert scale has 
likely_values <-  c(
  "very unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "very likely"
)

consume_values <- c(
  "Less than 1 hour per day",
  "1-2 hours per day",
  "2-3 hours per day",
  "3-4 hours per day",
  "5+ hours per day"
)

interest_levels <- c ( "not at all interested in this",
                      "not interested", 
                      "neither not interested nor interested",
                      "interested",
                      "very much interested"
                      )


likley_shory_values <- c("Very unlikely",
                         "Unlikely",
                         "Neither likely nor unlikely",
                         "Likely",
                         "Very likely")


knowledgable_values <- c(
  "very unknowledgeable",
  "somewhat unknowledgeable",
  "neither",
  "somewhat knowledgeable",
  "very knowledgeable"
)
easy_levels <- c("Very difficult",
                 "Difficult",
                 "Neither difficult nor easy",
                 "Easy",
                 "Very easy")

boolen_q <- c( "Yes", "No")

importance_levels <- c ("very unimportant" ,
                       "unimportant",
                       "neither important nor unimportant",
                       "important" ,
                       "very important"
                       )

novel_levels <-c ("not at all novel",
                  "not novel",
                  "neither novel nor not novel",
                  "novel",
                  "very novel")

familiar_levels <-c ("not at all familiar",
                     "not familiar",
                     "neither familiar nor unfamiliar",
                     "familiar",
                     "very familiar")

believe_levels <- c("very unlikely", 
                    "unlikely", 
                    "neither likely nor unlikely", 
                    "likely",
                    "very likely" )

####################
#After the survey, the postsurvey questions

judging_impact_values <- c(
  "extremely unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "extremely likely"
)

sharing_accuracy_level <- c ("not at all important",
                         "moderately important" ,
                       "slightly important",
                       "neither important nor unimportant",
                       "very important" ,
                       "extremely important"
                       )

Wrangling the Initial (Pre) survey data before experiment

ent_individual_df<-study2_data_all_ent |> filter(Finished == "True") |>
  mutate(Duration = as.numeric(`Duration (in seconds)`),
         AGE = as.numeric(AGE),
         BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
         browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
         USE_SNS = ordered(USE_SNS, levels = consume_values),
         use_sns = as.numeric(USE_SNS,  levels = consume_values),
         WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
         watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
         SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
         sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
         KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
         EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
         KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
         KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
         know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
         EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
         ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
  select(
    ResponseId,
    Duration,
    AGE,
    BROWSE_INTERNET,
    browse_internet,
    USE_SNS,
    use_sns,
    SNS_PLATFORM_USE,
    WATCHING_BEHAVIOR,
    watching_behavior,
    SHARING_BEHAVIOR,
    sharing_behavior,
    KNOW_DEEPFAKE,
    KNOW_CREATE_DF,
    know_create_df,
    EXP_CREATE_DF,
    EASE_CREATE_DF,
    ease_create_df
  ) |> mutate(
    SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
    value = TRUE
  ) |> 
  unnest() |> 
  mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |> 
  pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)

## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.

Wrangling of the survey data after control and learning treatment

ent_behavior_df <-study2_data_all_ent |> filter(Finished == "True") |>
     select(ResponseId, matches ("_R_"), matches ("_DF_")) |> 
     pivot_longer(-ResponseId, values_drop_na = TRUE) |>
     separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
     pivot_wider(names_from = question, values_from = value)|>
     select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
     #Removing the error value created with Dont share 3E
     #filter (video != "3E") |>
     mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
         BELIEVE= str_to_lower(BELIEVE),
         believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
         IMPT= str_to_lower(IMPT),
         impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         INTEREST = str_to_lower(INTEREST), 
         INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
         interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         FAMILIAR = str_to_lower (FAMILIAR),
         familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
         NOVEL = str_to_lower(NOVEL),
         novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
   #Select all the numeric values 
   select (ResponseId, 
           fake, 
           video,
           SHARE,
           BELIEVE,
           believe_numerical,
           share_numerical, 
           impt_numerical , 
           interest_numerical,
           familiar_numerical,
           novel_numerical)
  
 
         
#Selecting the Control group and convert likert value to numerical in post survey
ent_behavior_df_condition_cnrl <-study2_data_all_ent |> filter(Finished == "True") |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Control") |> 
     mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(CTRL_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
           cy_numerical = as.numeric(ordered(CTRL_SHARING_ACCY, levels = sharing_accuracy_level)))|>
     select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)
    


#Selecting the Treatment group and convert likert value to numerical in post survey
ent_behavior_df_condition_trmnt <-study2_data_all_ent |> filter(Finished == "True") |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Treatment") |>
     mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(ACT_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
     select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)
    

# Binding the control and treatment into one table 
post_survey_df <-bind_rows(ent_behavior_df_condition_cnrl, ent_behavior_df_condition_trmnt)

# Binding the pre survey and post survey of each individual (60 participants)  
pre_post_df <-merge (ent_behavior_df,post_survey_df) 



# The data frame with their rating for each video and post survey 
ent_df <-merge (ent_behavior_df,pre_post_df)

Basic Histograms

library(ggplot2)

ggplot(ent_df, aes(x=BELIEVE, fill = fake )) +  geom_bar() + 
   facet_wrap(~condition, nrow=2)

ggplot(ent_df, aes(x=SHARE, fill = fake)) +
  geom_bar() +
  facet_wrap(~condition, nrow=2)

histogram_df_control<-ent_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)

histinfo=hist(histogram_df_control$believe_numerical)

histogram_df_treatment<-ent_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)

histinfo=hist(histogram_df_treatment$believe_numerical)

Mediation Analysis

Step 1 - The total effect - the main path

#Total path 

fit.totaleffect=lm(share_numerical~condition,ent_df)

summary(fit.totaleffect)

## 
## Call:
## lm(formula = share_numerical ~ condition, data = ent_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3556 -0.7258 -0.7258  0.6444  4.2742 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.72581    0.07566  22.810  < 2e-16 ***
## conditionTreatment  0.62975    0.10789   5.837 7.99e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.459 on 730 degrees of freedom
## Multiple R-squared:  0.04459,    Adjusted R-squared:  0.04328 
## F-statistic: 34.07 on 1 and 730 DF,  p-value: 7.988e-09

STEP 2- The effect of Independent variable Condition to the mediator belivability

fit.mediator=lm(believe_numerical~condition,ent_df)
summary(fit.mediator)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = ent_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08611 -1.08611 -0.08611  1.09946  2.09946 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.90054    0.07456  38.902   <2e-16 ***
## conditionTreatment  0.18557    0.10632   1.745   0.0813 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.438 on 730 degrees of freedom
## Multiple R-squared:  0.004156,   Adjusted R-squared:  0.002792 
## F-statistic: 3.047 on 1 and 730 DF,  p-value: 0.08133

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

fit.dv=lm(share_numerical~condition +believe_numerical,ent_df)
summary(fit.dv)

## 
## Call:
## lm(formula = share_numerical ~ condition + believe_numerical, 
##     data = ent_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6971 -1.0370 -0.5458  0.4666  4.6179 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.20070    0.13099  16.801  < 2e-16 ***
## conditionTreatment  0.66013    0.10677   6.183 1.05e-09 ***
## believe_numerical  -0.16372    0.03709  -4.414 1.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.441 on 729 degrees of freedom
## Multiple R-squared:  0.06946,    Adjusted R-squared:  0.06691 
## F-statistic: 27.21 on 2 and 729 DF,  p-value: 4.011e-12

STEP 4 Mediation model

library(mediation)

## Warning: package 'mediation' was built under R version 4.3.1

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: mvtnorm

## Loading required package: sandwich

## mediation: Causal Mediation Analysis
## Version: 4.5.0

results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)

## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively

## Running nonparametric bootstrap

summary(results)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.0304      -0.0765         0.00   0.092 .  
## ADE              0.6601       0.4538         0.87  <2e-16 ***
## Total Effect     0.6297       0.4208         0.85  <2e-16 ***
## Prop. Mediated  -0.0482      -0.1481         0.00   0.092 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 732 
## 
## 
## Simulations: 1000

Conducting simple ANOVA for treatment and condiition

One way ANOVA

# OUr data ent_df 
one.way <- aov( share_numerical ~ condition, data = ent_df)

summary(one.way)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition     1   72.6   72.56   34.07 7.99e-09 ***
## Residuals   730 1554.5    2.13                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of one way

library (lsr)

## Warning: package 'lsr' was built under R version 4.3.1

# Eta sqr one way 
Eta_oneway <-etaSquared(one.way)

print (Eta_oneway)

##               eta.sq eta.sq.part
## condition 0.04459247  0.04459247

Effect size of two way with condition and beliveability

ent_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
                                   data= ent_df)

summary (ent_twoway_no_interactions)

##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## condition           1   72.6   72.56   34.94 5.23e-09 ***
## believe_numerical   1   40.5   40.47   19.48 1.17e-05 ***
## Residuals         729 1514.1    2.08                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way wiuthout interactions

# Eta sqr one way 
Eta_twoway_no_interactions <-etaSquared(ent_twoway_no_interactions)

print (Eta_twoway_no_interactions)

##                       eta.sq eta.sq.part
## condition         0.04879546  0.04982526
## believe_numerical 0.02487127  0.02603210

Two way ANOVA with Interactions

ANOVA 2 way with interactions

ent_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
                                     data=ent_df)

summary ( ent_twoway_with_interactions)

##                              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition                     1   72.6   72.56  35.189 4.62e-09 ***
## believe_numerical             1   40.5   40.47  19.627 1.09e-05 ***
## condition:believe_numerical   1   13.0   13.03   6.319   0.0122 *  
## Residuals                   728 1501.0    2.06                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way with interactions

# Eta sqr one way 
Eta_twoway_with_interactions <-etaSquared(ent_twoway_with_interactions)

print (Eta_twoway_with_interactions )

##                                 eta.sq eta.sq.part
## condition                   0.04879546 0.050236016
## believe_numerical           0.02487127 0.026252132
## condition:believe_numerical 0.00800756 0.008605317

ent_df |> ggplot(aes(condition,share_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

ent_df |> ggplot(aes(condition,believe_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

Analysing the sharing behavior of deepfakes and real, also the DF and R beliveability diference

# Load the openxlsx package
#install.packages("writexl")
library(writexl)

## Warning: package 'writexl' was built under R version 4.3.2

write_xlsx( 
        ent_df,"C:/Users/Dell/Downloads/ent_df.xlsx", 
        col_names = TRUE, 
        format_headers = TRUE)

#Use the file ent_df the do some bar grapphs on DF and R

# Load necessary libraries
library(ggplot2)
library(dplyr)




# Calculate the mean belief levels and standard errors for 'DF' and 'R' within each condition
belief_stats <- ent_df %>%
  group_by(condition, fake) %>%
  summarise(
    Mean_Belief_Level = mean(believe_numerical),
    SE = sd(believe_numerical) / sqrt(n())
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.

# Plotting the bars for DF and R with error bars
ggplot(belief_stats, aes(x = condition, y = Mean_Belief_Level, fill = fake)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.35) +
  geom_errorbar(aes(ymin = Mean_Belief_Level - SE, ymax = Mean_Belief_Level + SE),
                position = position_dodge(0.35), width = 0.25) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Belief Level", fill = "Type") +
  ggtitle("Mean Belief Levels for DF and R by Condition with Error Bars") +
  theme_minimal()

# Diference between the belive level of DF

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- ent_df %>%
  filter(fake == 'DF') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))

## [1] "Absolute Difference: 0.0951612903225807"

print(paste("Percentage Difference:", r_percentage_difference))

## [1] "Percentage Difference: 2.5801749271137"

Diference between R in COntrol and Treatment

# Load necessary libraries
library(dplyr)

# Calculate the mean belief levels for 'R' in each condition
r_means <- ent_df %>%
  filter(fake == 'R') %>%
  group_by(condition) %>%
  summarise(Mean_Belief_Level = mean(believe_numerical))

# Calculate the absolute difference in 'R' levels between treatment and control
r_difference <- r_means %>%
  summarise(Difference = Mean_Belief_Level[condition == "Treatment"] - Mean_Belief_Level[condition == "Control"]) %>%
  pull()

# Calculate the percentage difference relative to the Control condition
r_percentage_difference <- 100 * r_difference / r_means$Mean_Belief_Level[r_means$condition == "Control"]

# Print the absolute and percentage differences
print(paste("Absolute Difference:", r_difference))

## [1] "Absolute Difference: 0.275985663082437"

print(paste("Percentage Difference:", r_percentage_difference))

## [1] "Percentage Difference: 13.0619168787108"

Is there a statisitical dinference between DF in Control and Treatment

# Read the data from the ent_df


# Separate the belief levels for 'DF' in each condition
df_control_beliefs <- filter(ent_df, fake == 'DF' & condition == 'Control')$believe_numerical
df_treatment_beliefs <- filter(ent_df, fake == 'DF' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result <- t.test(df_control_beliefs, df_treatment_beliefs)

# Output the t-test result
print(t_test_result)

## 
##  Welch Two Sample t-test
## 
## data:  df_control_beliefs and df_treatment_beliefs
## t = -0.6964, df = 360.72, p-value = 0.4866
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3638874  0.1735649
## sample estimates:
## mean of x mean of y 
##  3.688172  3.783333

# The t-test for the mean belief levels for "DF" between the Control and Treatment groups yields a t-statistic of approximately -0.697 and a p-value of approximately 0.486.Since the p-value is greater than 0.05 (assuming a standard significance level), we do not have enough evidence to reject the null hypothesis of equal means. This means there is no statistical significance to the difference in 'DF' belief levels between the Control and Treatment groups with a 95% confidence level.

Is there a statisitical dinference between R in Control and Treatment

# Separate the belief levels for 'R' in each condition
r_control_beliefs <- filter(ent_df, fake == 'R' & condition == 'Control')$believe_numerical
r_treatment_beliefs <- filter(ent_df, fake == 'R' & condition == 'Treatment')$believe_numerical

# Perform the t-test
t_test_result_r <- t.test(r_control_beliefs, r_treatment_beliefs)

# Output the t-test result
print(t_test_result_r)

## 
##  Welch Two Sample t-test
## 
## data:  r_control_beliefs and r_treatment_beliefs
## t = -2.2863, df = 356.56, p-value = 0.02282
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.51338321 -0.03858812
## sample estimates:
## mean of x mean of y 
##  2.112903  2.388889

#The t-test for the mean belief levels for "R" between the Control and Treatment groups yields a t-statistic of approximately -2.29 and a p-value of approximately 0.0226.Since the p-value is less than 0.05 (assuming a standard significance level), we have evidence to reject the null hypothesis of equal means. This indicates that there is a statistically significant difference in 'R' belief levels between the Control and Treatment groups with a 95% confidence level.

Shere diference in DF and R between cotrol and treatment

library(readxl)
library(dplyr)
library(ggplot2)



# Filter the data for 'DF' and 'R' groups
df_data <- ent_df %>% filter(fake == 'DF')
r_data <- ent_df %>% filter(fake != 'DF')

# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

r_stats <- r_data %>%
  group_by(condition) %>%
  summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')

# Combine the data
plot_data <- bind_rows(
  mutate(df_stats, group = "DF"),
  mutate(r_stats, group = "R")
)

# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  geom_errorbar(
    aes(ymin = mean - sd, ymax = mean + sd, group = group),
    position = position_dodge(0.7), width = 0.25
  ) +
  scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
  labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of DF and R between Control and Treatment condition") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  scale_x_discrete(labels = c("Control", "Treatment")) +
  guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))

# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)

what is the diference between DF in Treatment and control

library(dplyr)


# Filter the data for the DF group
df_data <- filter(ent_df, fake == 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)

## $difference
## [1] 0.681362
## 
## $percentage_difference
## [1] 47.28856

#results 
#The difference in the mean "share_numerical" value for the DF (Deepfake Videos) group between the Treatment and Control conditions is approximately 0.681. In percentage terms, this represents an approximate 47.29% increase in the Treatment group compared to the Control group.

what is the diference between Real videos sharing intention in Control and Treatment

library(dplyr)

# Filter the data for the R group (assuming R group is represented by all non-DF values in 'fake' column)
r_data <- filter(ent_df, fake != 'DF')

# Group the data by condition and calculate the mean of 'share_numerical' for the R group
r_grouped <- r_data %>%
  group_by(condition) %>%
  summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')

# Extract the means for Treatment and Control
mean_treatment <- r_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- r_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)

# Calculate the difference and the percentage difference
r_difference <- mean_treatment - mean_control
r_percentage_difference <- (r_difference / mean_control) * 100

# Output the difference and the percentage difference
list(difference = r_difference, percentage_difference = r_percentage_difference)

## $difference
## [1] 0.5781362
## 
## $percentage_difference
## [1] 28.75223

#results'
#The difference in the mean "share_numerical" value for the R (Real Videos) group between the Treatment and Control conditions is approximately 0.578. In percentage terms, this represents an approximate 28.75% increase in the Treatment group compared to the Control group.

Study 2 Entertainment Deepfake

Dilrukshi

2023-11-06

Deepfake Entertainment Data Wrangling

Study 2 Entertainment Data cleaning

Likert scales to order levels

Wrangling the Initial (Pre) survey data before experiment

Wrangling of the survey data after control and learning treatment

Basic Histograms

Mediation Analysis

Step 1 - The total effect - the main path

STEP 2- The effect of Independent variable Condition to the mediator belivability

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

STEP 4 Mediation model

Conducting simple ANOVA for treatment and condiition

One way ANOVA

Effect size of one way

Effect size of two way with condition and beliveability

Effect size of two way wiuthout interactions

Two way ANOVA with Interactions

ANOVA 2 way with interactions

Effect size of two way with interactions

Diference between R in COntrol and Treatment

Is there a statisitical dinference between DF in Control and Treatment

Is there a statisitical dinference between R in Control and Treatment

Shere diference in DF and R between cotrol and treatment

what is the diference between DF in Treatment and control