Deepfake Combined Data Wrangling

#Clean Political data frame to explore the effect of measured other variables
study2_data_raw_pol_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Politics (Believability AND Sharing Intentions) - FINAL - Copy_July 28, 2023_06.53.csv")

## Rows: 64 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head (study2_data_raw_pol_all)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "6/21/20… "6/21/… "Surv… "100"    "12"                   "TRUE"   "6/21/2023 …
## 4 "6/21/20… "6/21/… "IP A… "100"    "1116"                 "TRUE"   "6/21/2023 …
## 5 "6/26/20… "6/26/… "IP A… "100"    "716"                  "TRUE"   "6/26/2023 …
## 6 "6/26/20… "6/26/… "IP A… "100"    "748"                  "TRUE"   "6/26/2023 …
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …

# Drop the first 4 raws as those were used for test 

study2_data_raw_pol<- study2_data_raw_pol_all[-c(1:4),]

head(study2_data_raw_pol)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 6/26/202… 6/26/2… IP Ad… 100      716                    TRUE     6/26/2023 2…
## 2 6/26/202… 6/26/2… IP Ad… 100      748                    TRUE     6/26/2023 2…
## 3 6/26/202… 6/26/2… IP Ad… 100      1174                   TRUE     6/26/2023 2…
## 4 6/26/202… 6/26/2… IP Ad… 100      991                    TRUE     6/26/2023 2…
## 5 6/26/202… 6/26/2… IP Ad… 100      1255                   TRUE     6/26/2023 2…
## 6 6/26/202… 6/26/2… IP Ad… 100      882                    TRUE     6/26/2023 2…
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `1P_R_BELIEVE` <chr>, …

nrow(study2_data_raw_pol)

## [1] 60

#Clean Entertainment data frame to explore the effect of measured other variables
study2_data_raw_ent_all <- read_csv("C:/Users/Dell/OneDrive/Documents/CREST Postdoc/Deepfakes Experiment/Study 2 raw data/NEW - DeepF_Study 2_Entertainment (Believability AND Sharing Intentions) - FINAL_July 28, 2023_06.18.csv")

## Rows: 63 Columns: 166
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (166): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head (study2_data_raw_ent_all)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 "Start D… "End D… "Resp… "Progre… "Duration (in seconds… "Finish… "Recorded D…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp… "{\"ImportI…
## 3 "2023-07… "2023-… "IP A… "100"    "785"                  "True"   "2023-07-02…
## 4 "2023-07… "2023-… "IP A… "100"    "1411"                 "True"   "2023-07-02…
## 5 "2023-07… "2023-… "IP A… "100"    "1686"                 "True"   "2023-07-02…
## 6 "2023-07… "2023-… "IP A… "100"    "741"                  "True"   "2023-07-02…
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …

# Drop the first 4 raws as those were used for test 

study2_data_all_ent<- study2_data_raw_ent_all[-c(1:2),]

head(study2_data_all_ent)

## # A tibble: 6 × 166
##   StartDate EndDate Status Progress Duration (in seconds…¹ Finished RecordedDate
##   <chr>     <chr>   <chr>  <chr>    <chr>                  <chr>    <chr>       
## 1 2023-07-… 2023-0… IP Ad… 100      785                    True     2023-07-02 …
## 2 2023-07-… 2023-0… IP Ad… 100      1411                   True     2023-07-02 …
## 3 2023-07-… 2023-0… IP Ad… 100      1686                   True     2023-07-02 …
## 4 2023-07-… 2023-0… IP Ad… 100      741                    True     2023-07-02 …
## 5 2023-07-… 2023-0… IP Ad… 100      735                    True     2023-07-02 …
## 6 2023-07-… 2023-0… IP Ad… 100      1578                   True     2023-07-02 …
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 159 more variables: ResponseId <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q_RecaptchaScore <chr>, QID1 <chr>, QID3 <chr>,
## #   `COUNTRY&CITY` <chr>, AGE <chr>, PRONOUNS <chr>, BROWSE_INTERNET <chr>,
## #   USE_SNS <chr>, SNS_PLATFORM_USE <chr>, WATCHING_BEHAVIOR <chr>,
## #   SHARING_BEHAVIOR <chr>, KNOW_DEEPFAKE <chr>, KNOW_CREATE_DF <chr>,
## #   EXP_CREATE_DF <chr>, EASE_CREATE_DF <chr>, `3E_R_BELIEVE` <chr>, …

nrow(study2_data_all_ent)

## [1] 61

Study 2 Combined Data cleaning

Likert scales to order levels

#```{r}
#Likert scale has 
likely_values <-  c(
  "very unlikely",
 "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
 "moderately likely",
 "very likely"
)



consume_values <- c(
  "Less than 1 hour per day",
  "1-2 hours per day",
  "2-3 hours per day",
  "3-4 hours per day",
  "5+ hours per day"
)

interest_levels <- c ( "not at all interested in this",
                      "not interested", 
                      "neither not interested nor interested",
                      "interested",
                      "very much interested"
                      )


likley_shory_values <- c("Very unlikely",
                         "Unlikely",
                         "Neither likely nor unlikely",
                         "Likely",
                         "Very likely")


knowledgable_values <- c(
  "very unknowledgeable",
  "somewhat unknowledgeable",
  "neither",
  "somewhat knowledgeable",
  "very knowledgeable"
)
easy_levels <- c("Very difficult",
                 "Difficult",
                 "Neither difficult nor easy",
                 "Easy",
                 "Very easy")

boolen_q <- c( "Yes", "No")

importance_levels <- c ("very unimportant" ,
                       "unimportant",
                       "neither important nor unimportant",
                       "important" ,
                       "very important"
                       )

novel_levels <-c ("not at all novel",
                  "not novel",
                  "neither novel nor not novel",
                  "novel",
                  "very novel")

familiar_levels <-c ("not at all familiar",
                     "not familiar",
                     "neither familiar nor unfamiliar",
                     "familiar",
                     "very familiar")

believe_levels <- c("very unlikely", 
                    "unlikely", 
                    "neither likely nor unlikely", 
                    "likely",
                    "very likely" )

####################
#After the survey, the postsurvey questions

judging_impact_values <- c(
  "extremely unlikely",
  "moderately unlikely",
  "slightly unlikely",
  "slightly likely",
  "moderately likely",
  "extremely likely"
)

sharing_accuracy_level <- c ("not at all important",
                         "moderately important" ,
                       "slightly important",
                       "neither important nor unimportant",
                       "very important" ,
                       "extremely important"
                       )

Wrangling the Initial (Pre) survey data before experiment

#Get all the data into lower case since likert will be all in lower 
pol_individual_df <- study2_data_raw_pol |> 
 mutate(Duration = as.numeric(`Duration (in seconds)`),
         AGE = as.numeric(AGE),
         BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
         browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
         USE_SNS = ordered(USE_SNS, levels = consume_values),
         use_sns = as.numeric(USE_SNS,  levels = consume_values),
         WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
         watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
         SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
         sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
         KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
         EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
         KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
         KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
         know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
         EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
         ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
  dplyr::select(
    ResponseId,
    Duration,
    AGE,
    BROWSE_INTERNET,
    browse_internet,
    USE_SNS,
    use_sns,
    SNS_PLATFORM_USE,
    WATCHING_BEHAVIOR,
    watching_behavior,
    SHARING_BEHAVIOR,
    sharing_behavior,
    KNOW_DEEPFAKE,
    KNOW_CREATE_DF,
    know_create_df,
    EXP_CREATE_DF,
    EASE_CREATE_DF,
    ease_create_df
  ) |> mutate(
    SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
    value = TRUE
  ) |> 
  unnest() |> 
  mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |> 
  pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)

## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.

#-------------------------------------------

ent_individual_df<-study2_data_all_ent  |>
  mutate(Duration = as.numeric(`Duration (in seconds)`),
         AGE = as.numeric(AGE),
         BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
         browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
         USE_SNS = ordered(USE_SNS, levels = consume_values),
         use_sns = as.numeric(USE_SNS,  levels = consume_values),
         WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
         watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
         SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
         sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
         KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
         EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
         KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
         KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
         know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
         EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
         ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
  dplyr::select(
    ResponseId,
    Duration,
    AGE,
    BROWSE_INTERNET,
    browse_internet,
    USE_SNS,
    use_sns,
    SNS_PLATFORM_USE,
    WATCHING_BEHAVIOR,
    watching_behavior,
    SHARING_BEHAVIOR,
    sharing_behavior,
    KNOW_DEEPFAKE,
    KNOW_CREATE_DF,
    know_create_df,
    EXP_CREATE_DF,
    EASE_CREATE_DF,
    ease_create_df
  ) |> mutate(
    SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
    value = TRUE
  ) |> 
  unnest() |> 
  mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |> 
  pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)

## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.

#--------------------------

#Combined individual 

combined_individual_df<-rbind(ent_individual_df,pol_individual_df)

Wrangling of the survey data after control and learning treatment

pol_behavior_df <-study2_data_raw_pol |>
     select(ResponseId, matches ("_R_"), matches ("_DF_")) |> 
     pivot_longer(-ResponseId, values_drop_na = TRUE) |>
     separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
     pivot_wider(names_from = question, values_from = value)|>
     select (ResponseId, fake, video, SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
     #Removing the error value created with Dont share 3E
     #filter (video != "3E") |>
     mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
         BELIEVE= str_to_lower(BELIEVE),
         believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
         IMPT= str_to_lower(IMPT),
         impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         INTEREST = str_to_lower(INTEREST), 
         INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
         interest_numerical = as.numeric(ordered(INTEREST, levels = interest_levels)),
         FAMILIAR = str_to_lower (FAMILIAR),
         familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
         NOVEL = str_to_lower(NOVEL),
         novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
   #Select all the numeric values 
   select (ResponseId, 
           fake, 
           video,
          SHARE,
           BELIEVE,
           believe_numerical,
           share_numerical, 
           impt_numerical , 
           interest_numerical,
           familiar_numerical,
           novel_numerical)
  
  
 
         
#Selecting the Control group and convert likert value to numerical in post survey
pol_behavior_df_condition_cntr <-study2_data_raw_pol |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Control") |>
     mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(CTRL_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
           dplyr::select (ResponseId, 
                   condition, 
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)

#Selecting the Treatment group and convert likert value to numerical in post survey
pol_behavior_df_condition_trmnt <-study2_data_raw_pol |> filter(Finished == "True") |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Treatment") |>
     mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(ACT_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)),
           cy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
     dplyr::select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)
    

# Binding the control and treatment into one table 
pol_survey_df <-bind_rows(pol_behavior_df_condition_trmnt, pol_behavior_df_condition_cntr)

# Binding the pre survey and post survey of each individual (60 participants)  
pol_pre_post_df <-merge (pol_survey_df, pol_individual_df ) 



# The data frame with their rating for each video and post survey 
pol_df <-merge (pol_behavior_df,pol_pre_post_df) 

#adding the video category column to the table 
pol_df['category'] <- 'pol'

colnames(pol_df)

##  [1] "ResponseId"               "fake"                    
##  [3] "video"                    "SHARE"                   
##  [5] "BELIEVE"                  "believe_numerical"       
##  [7] "share_numerical"          "impt_numerical"          
##  [9] "interest_numerical"       "familiar_numerical"      
## [11] "novel_numerical"          "condition"               
## [13] "JUDGING_IMPACT"           "SHARING_PERSP"           
## [15] "SHARING_INT"              "SHARING_ACCY"            
## [17] "judging_impact_numerical" "sharing_persp_numerical" 
## [19] "sharing_int_numerical"    "sharing_accy_numerical"  
## [21] "Duration"                 "AGE"                     
## [23] "BROWSE_INTERNET"          "browse_internet"         
## [25] "USE_SNS"                  "use_sns"                 
## [27] "WATCHING_BEHAVIOR"        "watching_behavior"       
## [29] "SHARING_BEHAVIOR"         "sharing_behavior"        
## [31] "KNOW_DEEPFAKE"            "KNOW_CREATE_DF"          
## [33] "know_create_df"           "EXP_CREATE_DF"           
## [35] "EASE_CREATE_DF"           "ease_create_df"          
## [37] "Plat_Facebook"            "Plat_Instagram"          
## [39] "Plat_Messenger"           "Plat_TikTok"             
## [41] "Plat_Snapchat"            "Plat_Twitter"            
## [43] "Plat_Quora"               "Plat_Microsoft Teams"    
## [45] "Plat_LinkedIn"            "Plat_YouTube"            
## [47] "Plat_Telegram"            "Plat_Reddit"             
## [49] "Plat_WhatsApp"            "Plat_Pinterest"          
## [51] "Plat_Skype"               "Plat_WeChat"             
## [53] "category"

#-----------------------------------------------------------------------

ent_behavior_df <-study2_data_all_ent |> filter(Finished == "True") |>
     dplyr::select(ResponseId, matches ("_R_"), matches ("_DF_")) |> 
     pivot_longer(-ResponseId, values_drop_na = TRUE) |>
     separate(name, c("video", "fake", "question"), "_", extra= "merge") |>
     pivot_wider(names_from = question, values_from = value)|>
     dplyr::select (ResponseId, fake, video ,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
     #Removing the error value created with Dont share 3E
     #filter (video != "3E") |>
     mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
         BELIEVE= str_to_lower(BELIEVE),
         believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
         IMPT= str_to_lower(IMPT),
         impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
         INTEREST = str_to_lower(INTEREST), 
         INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
         interest_numerical = as.numeric(ordered(INTEREST, levels = interest_levels)),
         FAMILIAR = str_to_lower (FAMILIAR),
         familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
         NOVEL = str_to_lower(NOVEL),
         novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))|>
   #Select all the numeric values 
   dplyr::select (ResponseId, 
           fake, 
           video,
           SHARE,
           BELIEVE,
           believe_numerical,
           share_numerical, 
           impt_numerical , 
           interest_numerical,
           familiar_numerical,
           novel_numerical)
  
 
         
#Selecting the Control group and convert likert value to numerical in post survey
ent_behavior_df_condition_cntr <-study2_data_all_ent|> filter(Finished == "True") |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     dplyr::select (ResponseId, condition, matches ("CTRL_")) |>
     filter (condition == "Control") |>
     mutate(JUDGING_IMPACT = str_to_lower(CTRL_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(CTRL_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(CTRL_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(CTRL_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
           dplyr::select (ResponseId, 
                   condition, 
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)

#Selecting the Treatment group and convert likert value to numerical in post survey
ent_behavior_df_condition_trmnt <-study2_data_all_ent |> filter(Finished == "True") |>
     mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" ))|>
     filter (condition == "Treatment") |>
     mutate(JUDGING_IMPACT = str_to_lower(ACT_JUDGING_IMPACT),
           judging_impact_numerical = as.numeric(ordered(JUDGING_IMPACT, levels = judging_impact_values)),
           SHARING_PERSP = str_to_lower(ACT_SHARING_PERSP),
           sharing_persp_numerical = as.numeric(ordered(SHARING_PERSP, levels = judging_impact_values)),
           SHARING_INT = str_to_lower(ACT_SHARING_INT),
           sharing_int_numerical = as.numeric(ordered(SHARING_INT, levels = judging_impact_values)),
           SHARING_ACCY = str_to_lower(ACT_SHARING_ACCY),
           sharing_accy_numerical = as.numeric(ordered(SHARING_ACCY, levels = sharing_accuracy_level)))|>
     dplyr::select (ResponseId, 
                   condition,
                   JUDGING_IMPACT,
                   SHARING_PERSP,
                   SHARING_INT,
                   SHARING_ACCY,
                   judging_impact_numerical,  
                   sharing_persp_numerical, 
                   sharing_int_numerical, 
                   sharing_accy_numerical)




# Binding the control and treatment into one table 
ent_post_survey_df <-bind_rows(ent_behavior_df_condition_trmnt, ent_behavior_df_condition_cntr)

# Binding the pre survey and post survey of each individual (60 participants)  
ent_pre_post_df <-merge (ent_post_survey_df, ent_individual_df) 



# The data frame with their rating for each video and post survey 
ent_df <-merge (ent_behavior_df,ent_pre_post_df) 



#adding the video category column to the table 
ent_df['category'] <- 'ent'
colnames(ent_df)

##  [1] "ResponseId"               "fake"                    
##  [3] "video"                    "SHARE"                   
##  [5] "BELIEVE"                  "believe_numerical"       
##  [7] "share_numerical"          "impt_numerical"          
##  [9] "interest_numerical"       "familiar_numerical"      
## [11] "novel_numerical"          "condition"               
## [13] "JUDGING_IMPACT"           "SHARING_PERSP"           
## [15] "SHARING_INT"              "SHARING_ACCY"            
## [17] "judging_impact_numerical" "sharing_persp_numerical" 
## [19] "sharing_int_numerical"    "sharing_accy_numerical"  
## [21] "Duration"                 "AGE"                     
## [23] "BROWSE_INTERNET"          "browse_internet"         
## [25] "USE_SNS"                  "use_sns"                 
## [27] "WATCHING_BEHAVIOR"        "watching_behavior"       
## [29] "SHARING_BEHAVIOR"         "sharing_behavior"        
## [31] "KNOW_DEEPFAKE"            "KNOW_CREATE_DF"          
## [33] "know_create_df"           "EXP_CREATE_DF"           
## [35] "EASE_CREATE_DF"           "ease_create_df"          
## [37] "Plat_Facebook"            "Plat_YouTube"            
## [39] "Plat_Instagram"           "Plat_Messenger"          
## [41] "Plat_TikTok"              "Plat_Snapchat"           
## [43] "Plat_Pinterest"           "Plat_Twitter"            
## [45] "Plat_Reddit"              "Plat_Microsoft Teams"    
## [47] "Plat_WeChat"              "Plat_Skype"              
## [49] "Plat_WhatsApp"            "Plat_Telegram"           
## [51] "Plat_LinkedIn"            "Plat_Quora"              
## [53] "category"

colnames(pol_df)

##  [1] "ResponseId"               "fake"                    
##  [3] "video"                    "SHARE"                   
##  [5] "BELIEVE"                  "believe_numerical"       
##  [7] "share_numerical"          "impt_numerical"          
##  [9] "interest_numerical"       "familiar_numerical"      
## [11] "novel_numerical"          "condition"               
## [13] "JUDGING_IMPACT"           "SHARING_PERSP"           
## [15] "SHARING_INT"              "SHARING_ACCY"            
## [17] "judging_impact_numerical" "sharing_persp_numerical" 
## [19] "sharing_int_numerical"    "sharing_accy_numerical"  
## [21] "Duration"                 "AGE"                     
## [23] "BROWSE_INTERNET"          "browse_internet"         
## [25] "USE_SNS"                  "use_sns"                 
## [27] "WATCHING_BEHAVIOR"        "watching_behavior"       
## [29] "SHARING_BEHAVIOR"         "sharing_behavior"        
## [31] "KNOW_DEEPFAKE"            "KNOW_CREATE_DF"          
## [33] "know_create_df"           "EXP_CREATE_DF"           
## [35] "EASE_CREATE_DF"           "ease_create_df"          
## [37] "Plat_Facebook"            "Plat_Instagram"          
## [39] "Plat_Messenger"           "Plat_TikTok"             
## [41] "Plat_Snapchat"            "Plat_Twitter"            
## [43] "Plat_Quora"               "Plat_Microsoft Teams"    
## [45] "Plat_LinkedIn"            "Plat_YouTube"            
## [47] "Plat_Telegram"            "Plat_Reddit"             
## [49] "Plat_WhatsApp"            "Plat_Pinterest"          
## [51] "Plat_Skype"               "Plat_WeChat"             
## [53] "category"

#--------------------------------

combined_df<- bind_rows(ent_df, pol_df)
colnames(combined_df)

##  [1] "ResponseId"               "fake"                    
##  [3] "video"                    "SHARE"                   
##  [5] "BELIEVE"                  "believe_numerical"       
##  [7] "share_numerical"          "impt_numerical"          
##  [9] "interest_numerical"       "familiar_numerical"      
## [11] "novel_numerical"          "condition"               
## [13] "JUDGING_IMPACT"           "SHARING_PERSP"           
## [15] "SHARING_INT"              "SHARING_ACCY"            
## [17] "judging_impact_numerical" "sharing_persp_numerical" 
## [19] "sharing_int_numerical"    "sharing_accy_numerical"  
## [21] "Duration"                 "AGE"                     
## [23] "BROWSE_INTERNET"          "browse_internet"         
## [25] "USE_SNS"                  "use_sns"                 
## [27] "WATCHING_BEHAVIOR"        "watching_behavior"       
## [29] "SHARING_BEHAVIOR"         "sharing_behavior"        
## [31] "KNOW_DEEPFAKE"            "KNOW_CREATE_DF"          
## [33] "know_create_df"           "EXP_CREATE_DF"           
## [35] "EASE_CREATE_DF"           "ease_create_df"          
## [37] "Plat_Facebook"            "Plat_YouTube"            
## [39] "Plat_Instagram"           "Plat_Messenger"          
## [41] "Plat_TikTok"              "Plat_Snapchat"           
## [43] "Plat_Pinterest"           "Plat_Twitter"            
## [45] "Plat_Reddit"              "Plat_Microsoft Teams"    
## [47] "Plat_WeChat"              "Plat_Skype"              
## [49] "Plat_WhatsApp"            "Plat_Telegram"           
## [51] "Plat_LinkedIn"            "Plat_Quora"              
## [53] "category"

Basic Histograms

library(ggplot2)

ggplot(combined_df, aes(x=BELIEVE, fill = fake )) +  geom_bar() + 
   facet_wrap(~condition, nrow=2)

ggplot(combined_df, aes(x=SHARE, fill = fake)) +
  geom_bar() +
  facet_wrap(~condition, nrow=2)

histogram_df_control<-combined_df |> filter (condition =="Control")
histinfo=hist(histogram_df_control$share_numerical)

histinfo=hist(histogram_df_control$believe_numerical)

histogram_df_treatment<-combined_df |> filter (condition =="Treatment")
histinfo=hist(histogram_df_treatment$share_numerical)

histinfo=hist(histogram_df_treatment$believe_numerical)

Mediation Analysis

Step 1 - The total effect - the main path

#Total path 

fit.totaleffect=lm(share_numerical~condition,combined_df)

summary(fit.totaleffect)

## 
## Call:
## lm(formula = share_numerical ~ condition, data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3556 -0.8167 -0.8167  0.6444  4.1833 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.81667    0.05348  33.966  < 2e-16 ***
## conditionTreatment  0.53889    0.09264   5.817 7.88e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.435 on 1078 degrees of freedom
## Multiple R-squared:  0.03044,    Adjusted R-squared:  0.02954 
## F-statistic: 33.84 on 1 and 1078 DF,  p-value: 7.884e-09

STEP 2- The effect of Independent variable Condition to the mediator belivability

fit.mediator=lm(believe_numerical~condition,combined_df)
summary(fit.mediator)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = combined_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08611 -1.08611 -0.00556  0.99444  1.99444 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.00556    0.05406   55.60   <2e-16 ***
## conditionTreatment  0.08056    0.09363    0.86     0.39    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 1078 degrees of freedom
## Multiple R-squared:  0.0006861,  Adjusted R-squared:  -0.0002409 
## F-statistic: 0.7401 on 1 and 1078 DF,  p-value: 0.3898

STEP 3 - The effect of the mediator on the dependent variable shareing behaviour

fit.dv=lm(share_numerical~condition +believe_numerical,combined_df)
summary(fit.dv)

## 
## Call:
## lm(formula = share_numerical ~ condition + believe_numerical, 
##     data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6612 -0.9640 -0.5245  0.4853  4.4755 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.25696    0.10407  21.687  < 2e-16 ***
## conditionTreatment  0.55069    0.09169   6.006 2.60e-09 ***
## believe_numerical  -0.14649    0.02981  -4.913 1.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.42 on 1077 degrees of freedom
## Multiple R-squared:  0.05169,    Adjusted R-squared:  0.04993 
## F-statistic: 29.35 on 2 and 1077 DF,  p-value: 3.866e-13

STEP 4 Mediation model

library(mediation)

## Warning: package 'mediation' was built under R version 4.3.1

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: mvtnorm

## Loading required package: sandwich

## mediation: Causal Mediation Analysis
## Version: 4.5.0

results = mediate(fit.mediator, fit.dv, treat='condition', mediator='believe_numerical', boot=T)

## Warning in mediate(fit.mediator, fit.dv, treat = "condition", mediator =
## "believe_numerical", : treatment and control values do not match factor levels;
## using Control and Treatment as control and treatment, respectively

## Running nonparametric bootstrap

summary(results)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.0118      -0.0402         0.01    0.38    
## ADE              0.5507       0.3441         0.75  <2e-16 ***
## Total Effect     0.5389       0.3311         0.73  <2e-16 ***
## Prop. Mediated  -0.0219      -0.0877         0.03    0.38    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1080 
## 
## 
## Simulations: 1000

Conducting simple ANOVA for treatment and condiition

One way ANOVA

# OUr data ent_df 
one.way <- aov( share_numerical ~ condition, data = combined_df)

summary(one.way)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## condition      1   69.7   69.70   33.84 7.88e-09 ***
## Residuals   1078 2220.3    2.06                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of one way

library (lsr)

## Warning: package 'lsr' was built under R version 4.3.1

# Eta sqr one way 
Eta_oneway <-etaSquared(one.way)

print (Eta_oneway)

##               eta.sq eta.sq.part
## condition 0.03043526  0.03043526

Effect size of two way with condition and beliveability (no interactions)

combined_twoway_no_interactions <- aov (share_numerical ~ condition+ believe_numerical,
                                   data= combined_df)

summary (combined_twoway_no_interactions)

##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## condition            1   69.7   69.70   34.57 5.49e-09 ***
## believe_numerical    1   48.7   48.68   24.14 1.03e-06 ***
## Residuals         1077 2171.6    2.02                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way wiuthout interactions

# Eta sqr one way 
Eta_twoway_no_interactions <-etaSquared(combined_twoway_no_interactions)

print (Eta_twoway_no_interactions)

##                       eta.sq eta.sq.part
## condition         0.03176101  0.03240691
## believe_numerical 0.02125675  0.02192401

Two way ANOVA with Interactions

combined_twoway_with_interactions <- aov (share_numerical ~ condition * believe_numerical,
                                     data=combined_df)

summary ( combined_twoway_with_interactions)

##                               Df Sum Sq Mean Sq F value   Pr(>F)    
## condition                      1   69.7   69.70   34.64 5.31e-09 ***
## believe_numerical              1   48.7   48.68   24.19 1.01e-06 ***
## condition:believe_numerical    1    6.4    6.40    3.18   0.0748 .  
## Residuals                   1076 2165.2    2.01                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size of two way with interactions

# Eta sqr one way 
Eta_twoway_with_interactions <-etaSquared(combined_twoway_with_interactions)

print (Eta_twoway_with_interactions )

##                                  eta.sq eta.sq.part
## condition                   0.031761008 0.032499571
## believe_numerical           0.021256751 0.021987382
## condition:believe_numerical 0.002794306 0.002946622

combined_df |> ggplot(aes(condition,share_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

combined_df |> ggplot(aes(condition,believe_numerical)) + 
  # facet_wrap(vars(video)) +
  stat_summary(
    fun.data = mean_cl_boot,
    geom = "pointrange",
    shape = 21,
    fill = "white"
  )

# What if there is a deepfake/read effect to the share intensions

deepfake.anova <- aov(share_numerical~condition + fake, data=combined_df)
summary(deepfake.anova)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## condition      1   69.7   69.70   34.63 5.33e-09 ***
## fake           1   52.4   52.45   26.06 3.92e-07 ***
## Residuals   1077 2167.8    2.01                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

What if there is a deepfake/read effect and there interetest to the share intentions

interest_deepfake.anova <- aov(share_numerical~condition + fake+ interest_numerical, data=combined_df)
summary(interest_deepfake.anova)

##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## condition             1   69.7    69.7   64.90 2.08e-15 ***
## fake                  1   52.4    52.4   48.84 4.87e-12 ***
## interest_numerical    1 1012.3  1012.3  942.61  < 2e-16 ***
## Residuals          1076 1155.5     1.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

What if there is a effect of deepfake/real + interetest + novel+ to the share intentions

novel_interest_deepfake.anova <- aov(share_numerical~condition + fake+ interest_numerical+ novel_numerical, data=combined_df)
summary(novel_interest_deepfake.anova)

##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## condition             1   69.7    69.7   70.64  < 2e-16 ***
## fake                  1   52.4    52.4   53.16 5.96e-13 ***
## interest_numerical    1 1012.3  1012.3 1025.96  < 2e-16 ***
## novel_numerical       1   94.9    94.9   96.14  < 2e-16 ***
## Residuals          1075 1060.7     1.0                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

What if there is a effect of deepfake/real + novel+ importance to the share intentions

impt_novel_interest_deepfake.anova <- aov(share_numerical~ condition + fake +  novel_numerical + impt_numerical, data=combined_df)
summary(impt_novel_interest_deepfake.anova)

##                   Df Sum Sq Mean Sq F value   Pr(>F)    
## condition          1   69.7    69.7   50.87 1.81e-12 ***
## fake               1   52.4    52.4   38.28 8.70e-10 ***
## novel_numerical    1  609.2   609.2  444.66  < 2e-16 ***
## impt_numerical     1   85.7    85.7   62.55 6.40e-15 ***
## Residuals       1075 1472.9     1.4                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

How about the sharing intention of only deepfake videos and only Real videos

#select only deepfakes 
only_df<- combined_df |> filter (fake =="DF")

deepfake_video.anova <- aov(share_numerical~condition , data=only_df)
summary(deepfake_video.anova)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition     1   32.4   32.38   19.41 1.27e-05 ***
## Residuals   538  897.5    1.67                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#select only real
only_real <- combined_df |> filter (fake == "R")

real_video.anova <- aov(share_numerical~condition , data=only_real)
summary(real_video.anova)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## condition     1   37.4   37.41   15.84 7.82e-05 ***
## Residuals   538 1270.2    2.36                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regression model using only deepfake videos

deepfake_lm=lm(share_numerical~condition,only_df)
summary(deepfake_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition, data = only_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1222 -0.6028 -0.6028  0.3972  4.3972 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.60278    0.06807  23.545  < 2e-16 ***
## conditionTreatment  0.51944    0.11791   4.406 1.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.292 on 538 degrees of freedom
## Multiple R-squared:  0.03482,    Adjusted R-squared:  0.03303 
## F-statistic: 19.41 on 1 and 538 DF,  p-value: 1.274e-05

Regression model using only real videos

real_lm=lm(share_numerical~condition,only_real)
summary(real_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition, data = only_real)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5889 -1.0306 -1.0306  0.9694  3.9694 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.03056    0.08098   25.07  < 2e-16 ***
## conditionTreatment  0.55833    0.14027    3.98 7.82e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.537 on 538 degrees of freedom
## Multiple R-squared:  0.02861,    Adjusted R-squared:  0.0268 
## F-statistic: 15.84 on 1 and 538 DF,  p-value: 7.823e-05

Regreassion of all covariates

multi_factor_lm=lm(share_numerical~condition + interest_numerical  + impt_numerical+ believe_numerical +  familiar_numerical+ novel_numerical , combined_df)
summary(multi_factor_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition + interest_numerical + 
##     impt_numerical + believe_numerical + familiar_numerical + 
##     novel_numerical, data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3645 -0.5530  0.0070  0.5067  4.3822 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.39993    0.12810  -3.122  0.00184 ** 
## conditionTreatment  0.44452    0.06431   6.913 8.15e-12 ***
## interest_numerical  0.61901    0.03112  19.894  < 2e-16 ***
## impt_numerical      0.01726    0.02993   0.577  0.56421    
## believe_numerical  -0.03192    0.02135  -1.495  0.13519    
## familiar_numerical  0.02362    0.02125   1.111  0.26666    
## novel_numerical     0.29938    0.03173   9.435  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9926 on 1073 degrees of freedom
## Multiple R-squared:  0.5384, Adjusted R-squared:  0.5358 
## F-statistic: 208.6 on 6 and 1073 DF,  p-value: < 2.2e-16

interest_factor_lm=lm(share_numerical~condition + 
                     interest_numerical ,combined_df)
summary (interest_factor_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition + interest_numerical, 
##     data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8764 -0.5409  0.0229  0.4591  4.0229 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.01605    0.06983  -0.230    0.818    
## conditionTreatment  0.43617    0.06695   6.515 1.11e-10 ***
## interest_numerical  0.77850    0.02472  31.498  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.036 on 1077 degrees of freedom
## Multiple R-squared:  0.4953, Adjusted R-squared:  0.4944 
## F-statistic: 528.6 on 2 and 1077 DF,  p-value: < 2.2e-16

impt_factor_lm = lm (share_numerical~condition + impt_numerical ,combined_df)
summary (impt_factor_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition + impt_numerical, data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3396 -0.8511 -0.2798  0.6604  4.7202 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.28751    0.10950   2.626  0.00877 ** 
## conditionTreatment  0.57128    0.08377   6.820 1.52e-11 ***
## impt_numerical      0.49617    0.03188  15.564  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.297 on 1077 degrees of freedom
## Multiple R-squared:  0.2085, Adjusted R-squared:  0.207 
## F-statistic: 141.8 on 2 and 1077 DF,  p-value: < 2.2e-16

int_imt_factor_lm=lm (share_numerical~condition + impt_numerical + interest_numerical, combined_df)
summary (int_imt_factor_lm)

## 
## Call:
## lm(formula = share_numerical ~ condition + impt_numerical + interest_numerical, 
##     data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9194 -0.6325  0.0861  0.3670  4.0861 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.16870    0.08906  -1.894   0.0585 .  
## conditionTreatment  0.44743    0.06687   6.691 3.55e-11 ***
## impt_numerical      0.08323    0.03029   2.748   0.0061 ** 
## interest_numerical  0.73438    0.02941  24.970  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.033 on 1076 degrees of freedom
## Multiple R-squared:  0.4989, Adjusted R-squared:  0.4975 
## F-statistic:   357 on 3 and 1076 DF,  p-value: < 2.2e-16

#Checking discernment 
Combined_believability<-lm (formula = believe_numerical ~ condition, data = combined_df)
summary(Combined_believability)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = combined_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08611 -1.08611 -0.00556  0.99444  1.99444 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.00556    0.05406   55.60   <2e-16 ***
## conditionTreatment  0.08056    0.09363    0.86     0.39    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 1078 degrees of freedom
## Multiple R-squared:  0.0006861,  Adjusted R-squared:  -0.0002409 
## F-statistic: 0.7401 on 1 and 1078 DF,  p-value: 0.3898

colnames(combined_df)

##  [1] "ResponseId"               "fake"                    
##  [3] "video"                    "SHARE"                   
##  [5] "BELIEVE"                  "believe_numerical"       
##  [7] "share_numerical"          "impt_numerical"          
##  [9] "interest_numerical"       "familiar_numerical"      
## [11] "novel_numerical"          "condition"               
## [13] "JUDGING_IMPACT"           "SHARING_PERSP"           
## [15] "SHARING_INT"              "SHARING_ACCY"            
## [17] "judging_impact_numerical" "sharing_persp_numerical" 
## [19] "sharing_int_numerical"    "sharing_accy_numerical"  
## [21] "Duration"                 "AGE"                     
## [23] "BROWSE_INTERNET"          "browse_internet"         
## [25] "USE_SNS"                  "use_sns"                 
## [27] "WATCHING_BEHAVIOR"        "watching_behavior"       
## [29] "SHARING_BEHAVIOR"         "sharing_behavior"        
## [31] "KNOW_DEEPFAKE"            "KNOW_CREATE_DF"          
## [33] "know_create_df"           "EXP_CREATE_DF"           
## [35] "EASE_CREATE_DF"           "ease_create_df"          
## [37] "Plat_Facebook"            "Plat_YouTube"            
## [39] "Plat_Instagram"           "Plat_Messenger"          
## [41] "Plat_TikTok"              "Plat_Snapchat"           
## [43] "Plat_Pinterest"           "Plat_Twitter"            
## [45] "Plat_Reddit"              "Plat_Microsoft Teams"    
## [47] "Plat_WeChat"              "Plat_Skype"              
## [49] "Plat_WhatsApp"            "Plat_Telegram"           
## [51] "Plat_LinkedIn"            "Plat_Quora"              
## [53] "category"

# real  videos 
Fake_video_df <- combined_df |> filter(fake =='DF')
fake_video_lm<- lm (formula = believe_numerical ~ condition, data = Fake_video_df)
summary(fake_video_lm)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = Fake_video_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9083 -0.9083  0.2167  1.0917  1.2167 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.90833    0.06588  59.328   <2e-16 ***
## conditionTreatment -0.12500    0.11410  -1.096    0.274    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.25 on 538 degrees of freedom
## Multiple R-squared:  0.002226,   Adjusted R-squared:  0.0003712 
## F-statistic:   1.2 on 1 and 538 DF,  p-value: 0.2738

Real_video_df <- combined_df |> filter(fake =='DF')
fake_video_lm<- lm (formula = believe_numerical ~ condition, data = Fake_video_df)
summary(fake_video_lm)

## 
## Call:
## lm(formula = believe_numerical ~ condition, data = Fake_video_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9083 -0.9083  0.2167  1.0917  1.2167 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.90833    0.06588  59.328   <2e-16 ***
## conditionTreatment -0.12500    0.11410  -1.096    0.274    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.25 on 538 degrees of freedom
## Multiple R-squared:  0.002226,   Adjusted R-squared:  0.0003712 
## F-statistic:   1.2 on 1 and 538 DF,  p-value: 0.2738

Understaning the reasons for sharing videos and not sharing videos POLITICAL CONTEXT

#selecting the sharing reasons for real videos 

 clean_political_data <-study2_data_raw_pol |>
  dplyr::select(ResponseId, matches ("_R_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  
  )

clean_political_data |> 
  pivot_longer(cols = c(SHARE_REASON,DNSHARE_REASON)) |> 
  mutate(
    name = if_else(name == "DNSHARE_REASON","Not sharing", "Sharing")
  ) |> 
  filter(!grepl("Other",value)) |> 
  filter(!is.na(value)) |> 
  group_by(value,video,name) |> count() |> 
  ggplot(aes(reorder(value,n),n)) +
  geom_col() + 
  coord_flip() +
  facet_grid(rows = vars(name),scales = "free",cols = vars(video)) +
  labs(x="Real Video Plitical Videos", y="") +
  theme_pubclean()

# Sharing and not sharing reasons on Deefake videos 
 clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_DF_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

clean_political_data |> 
  pivot_longer(cols = c(SHARE_REASON,DNSHARE_REASON)) |> 
  mutate(
    name = if_else(name == "DNSHARE_REASON","Not sharing", "Sharing")
  ) |> 
  filter(!grepl("Other",value)) |> 
  filter(!is.na(value)) |> 
  group_by(value,video,name) |> count() |> 
  ggplot(aes(reorder(value,n),n)) +
  geom_col() + 
  coord_flip() +
  facet_grid(rows = vars(name),scales = "free",cols = vars(video)) +
  labs(x="DeepFake  Political Videos", y="") +
  theme_pubclean()

# Understaning the reasons for sharing videos and not sharing videos ENTERTAINMENT CONTEXT

# Sharing and not sharing reasons on Deefake videos 
 clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_DF_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

clean_entertainment_data |> 
  pivot_longer(cols = c(SHARE_REASON,DNSHARE_REASON)) |> 
  mutate(
    name = if_else(name == "DNSHARE_REASON","Not sharing", "Sharing")
  ) |> 
  filter(!grepl("Other",value)) |> 
  filter(!is.na(value)) |> 
  group_by(value,video,name) |> count() |> 
  ggplot(aes(reorder(value,n),n)) +
  geom_col() + 
  coord_flip() +
  facet_grid(rows = vars(name),scales = "free",cols = vars(video)) +
  labs(x="DeepFake Entertainment Videos", y="") +
  theme_pubclean()

# Sharing and not sharing reasons on Real videos 
 clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_R_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

clean_entertainment_data |> 
  pivot_longer(cols = c(SHARE_REASON,DNSHARE_REASON)) |> 
  mutate(
    name = if_else(name == "DNSHARE_REASON","Not sharing", "Sharing")
  ) |> 
  filter(!grepl("Other",value)) |> 
  filter(!is.na(value)) |> 
  group_by(value,video,name) |> count() |> 
  ggplot(aes(reorder(value,n),n)) +
  geom_col() + 
  coord_flip() +
  facet_grid(rows = vars(name),scales = "free",cols = vars(video)) +
  labs(x="Real Entertainment Videos ", y="") +
  theme_pubclean()

Entertainment videos

DEEPFAKE VIDEOs

#########################################
## DEEPFAKES DNSHARING REASONS FOR OVERAL DATA 
#########################################
 all_deepfake_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_DF_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

# Table with RespondID, DNSharing reasons and entertainment
tab_deepfake_DN_entertainment<- all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl DF ENTERTAINMENT", x="DNSharing DeepFake Entertainment Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA by video level
#########################################
ggplot(data = tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title= "Trmtn+ Control DF DNSHARE ", x="Resons DNSharing DeepFake Entertainment Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA 
#########################################
tab_deepfake_Share_entertainment<- all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = tab_deepfake_Share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "OVERAL SHARING DF", x="Resons for Sharing DeepFake Entertainment Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA  Group by video
#########################################
ggplot(data = tab_deepfake_Share_entertainment, aes(x = SHARE_REASON, fill = video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+ 
  labs(title= "OVERAL SHARING DF by Vedio", x="Resons for not Sharing DeepFake Entertainment Videos", y="")

## DEEPFAKE VIDEO: TREATMENT AND CONTROL DEEPFAKES DNSHARING and SHARING based on Active and Control cases

#########################################
##   TREATMENT DN SHARE REASONS   #########
##########################################

 trmt_all_deepfake_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_deepfake_DN_entertainment<- trmt_all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = trmt_tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHARE DF", x="DNSHARE DeepFake Entertainment Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = trmt_tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHare DF", x="DNSHARE DeepFake Entertainment Videos", y="")

####################################
# Control Condition DNSHARE
####################################

 cntrl_all_deepfake_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Control Condition SHARE
####################################

# Table with control RespondID, DNSharing reasons and entertainment
cntrl_tab_deepfake_DN_entertainment<- cntrl_all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = cntrl_tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION DNShare DF", x="DNSHARE DeepFake Entertainment Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = cntrl_tab_deepfake_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROLT CONDITION DNShare vidol level", x="DNSHARE DeepFake Entertainment Videos", y="")

###   TREAMENT SHARING  ##############

#########################################
## DEEPFAKES ***SHARING  based on Active and Control cases  
#########################################

 trmt_alldeen_deepfake_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition SHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_deepfake_share_entertainment<- trmt_all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = trmt_tab_deepfake_share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION SHARE DF", x="SHARE DeepFake Entertainment Videos", y="")

##### The share reasons group by videos #######

ggplot(data = trmt_tab_deepfake_share_entertainment, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION Share DF", x="SHARE DeepFake Entertainment Videos", y="")

####################################
# Control Condition SHARE
####################################

 cntrl_all_deepfake_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Control Condition SHARE
####################################

# Table with RespondID, Sharing reasons and entertainment
cntrl_tab_deepfake_share_entertainment<- cntrl_all_deepfake_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = cntrl_tab_deepfake_share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION SHARE DF", x="SHARE DeepFake Entertainment Videos", y="")

##### The share reasons group by videos #######

ggplot(data =  cntrl_tab_deepfake_share_entertainment, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROLT CONDITION Deepfake vidol level", x="SHARE DeepFake Entertainment Videos", y="")

REAL VIDEOS

#########################################
## REAL  DNSHARING REASONS FOR OVERAL DATA 
#########################################
 all_real_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_R_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

# Table with RespondID, DNSharing reasons and entertainment
tab_real_DN_entertainment<- all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = tab_real_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real ENTERTAINMENT", x="DNSharing Real Entertainment Videos", y="")

ggplot(data = tab_real_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real ENTERTAINMENT", x="DNSharing Real Entertainment Videos", y="")

#########################################
## REAL VIDEOS SHARING REASONS FOR OVERAL DATA 
#########################################
tab_real_Share_entertainment<- all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = tab_real_Share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "OVERAL SHARING REAL", x="Resons for Sharing Real Entertainment Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA  Group by video
#########################################
ggplot(data = tab_real_Share_entertainment, aes(x = SHARE_REASON, fill = video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+ 
  labs(title= "OVERAL SHARING REAL by Vedio", x="Resons for not Sharing REAL Entertainment Videos", y="")

REAL VIDEO: TREATMENT AND CONTROL DNSHARING and SHARING based on Active and Control cases

#########################################
##   TREATMENT DN SHARE REASONS  FOR REAL  #########
##########################################

 trmt_all_real_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_real_DN_entertainment<- trmt_all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = trmt_tab_real_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHARE REAL", x="DNSHARE Real Entertainment Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = trmt_tab_real_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHare REAL", x="DNSHARE Real Entertainment Videos", y="")

#########################################
##   CONTROL DN SHARE REASONS  FOR REAL  #########
##########################################

 cntrl_all_real_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Cotrol Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
cntrl_tab_real_DN_entertainment<- cntrl_all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = cntrl_tab_real_DN_entertainment, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION DNSHARE REAL", x="DNSHARE Real Entertainment Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = cntrl_tab_real_DN_entertainment, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION DNSHare REAL", x="DNSHARE Real Entertainment Videos", y="")

###   TREAMENT SHARING  ##############

#########################################
## REAL  ***SHARING  based on Active and Control cases  
#########################################

 trmt_all_real_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition SHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_real_share_entertainment<- trmt_all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = trmt_tab_real_share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION SHARE REAL", x="SHARE REAL Entertainment Videos", y="")

##### The share reasons group by videos #######

ggplot(data = trmt_tab_real_share_entertainment, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION Share REAL", x = "SHARE REAL Entertainment Videos", y="")

###  CONTROL SHARING  ##############

#########################################
## REAL  ***SHARING  based on Active and Control cases  
#########################################

 cntrl_all_real_clean_entertainment_data <-study2_data_all_ent |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Control Condition SHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
cntrl_tab_real_share_entertainment<- cntrl_all_real_clean_entertainment_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = cntrl_tab_real_share_entertainment, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION SHARE REAL", x="SHARE REAL Entertainment Videos", y="")

##### The share reasons group by videos #######

ggplot(data = cntrl_tab_real_share_entertainment, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "Control CONDITION Share REAL", x = "SHARE REAL Entertainment Videos", y="")

# Political vidos DF and Real

Deepfake Videos

#########################################
## DEEPFAKES DNSHARING REASONS FOR OVERAL DATA 
#########################################
 all_deepfake_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_DF_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

# Table with RespondID, DNSharing reasons and political
tab_deepfake_DN_political<- all_deepfake_clean_political_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = tab_deepfake_DN_political, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl DF POLITICAL", x="DNSharing DeepFake POLITICAL Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA by video level
#########################################
ggplot(data = tab_deepfake_DN_political, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title= "Trmtn+ Control DF DNSHARE ", x="Resons DNSharing DeepFake Political Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL POLITICAL DATA 
#########################################
tab_deepfake_Share_political<- all_deepfake_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = tab_deepfake_Share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "OVERAL SHARING DF", x="Resons for Sharing DeepFake Political Videos", y="")

#########################################
## DEEPFAKES SHARING REASONS FOR OVERAL DATA  Group by video
#########################################
ggplot(data = tab_deepfake_Share_political, aes(x = SHARE_REASON, fill = video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+ 
  labs(title= "OVERAL SHARING DF by Vedio", x="Resons for not Sharing DeepFake Political Videos", y="")

## DNSharing Deepfakes based on Control and Treament groups

#########################################
##   TREATMENT DN SHARE REASONS POLITICAL  #########
##########################################

 trmt_all_deepfake_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_deepfake_DN_political<- trmt_all_deepfake_clean_political_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = trmt_tab_deepfake_DN_political, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHARE DF", x="DNSHARE DeepFake Political Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = trmt_tab_deepfake_DN_political, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION DNSHare DF", x="DNSHARE DeepFake Political Videos", y="")

## SHARE Deepfake Political Treatment and Control Reasons for

#########################################
##   TREATMENT SHARE REASONS POLITICAL  #########
##########################################

 trmt_all_deepfake_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_deepfake_share_political<- trmt_all_deepfake_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = trmt_tab_deepfake_share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION SHARE DF", x="SHARE DeepFake Political Videos", y="")

##### The share reasons group by videos #######

ggplot(data = trmt_tab_deepfake_share_political, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "TREATMENT CONDITION SHare DF", x="SHARE DeepFake Political Videos", y="")

#########################################
##   CONTROL SHARE REASONS POLITICAL  #########
##########################################

 cntrl_all_deepfake_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_DF_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Control Condition SHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
cntrl_tab_real_share_political<- cntrl_all_deepfake_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = cntrl_tab_real_share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION SHARE REAL", x="SHARE REAL Political Videos", y="")

##### The share reasons group by videos #######

ggplot(data = cntrl_tab_real_share_political, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "CONTROL CONDITION SHare REAL", x="SHARE REAL Political Videos", y="")

Real Political Videos

Overal DNSharing of Real political videos

#########################################
## REAL  DNSHARING REASONS FOR OVERAL DATA 
#########################################
 all_real_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_R_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

# Table with RespondID, DNSharing reasons and entertainment
tab_real_DN_political<- all_real_clean_political_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = tab_real_DN_political, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real Political", x="DNSharing Real Political Videos", y="")

ggplot(data = tab_real_DN_political, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real POLITICAL", x="DNSharing Real POLITICAL Videos", y="")

Overal SHARE Political Videos

#########################################
## REAL  DNSHARING REASONS FOR OVERAL DATA 
#########################################
 all_real_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  dplyr::select(ResponseId, matches ("_R_")) |>
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

# Table with RespondID, DNSharing reasons and entertainment
tab_real_share_political<- all_real_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = tab_real_share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real Political", x="Sharing Real Political Videos", y="")

ggplot(data = tab_real_share_political, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip()+
  labs(title = "trmt+cntrl Real POLITICAL", x="Sharing Real POLITICAL Videos", y="")

# Political REAL VIDEO: TREATMENT AND CONTROL DNSHARING and SHARING based on Active and Control cases

POLITICAL REAL DNSHARE TRMT CNTRL

#########################################
##   TREATMENT DN SHARE REASONS  FOR REAL  #########
##########################################

 trmt_all_real_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_real_DN_political<- trmt_all_real_clean_political_data |>
  dplyr::select(ResponseId, DNSHARE_REASON, video)


ggplot(data = trmt_tab_real_DN_political, aes(x = DNSHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL TREATMENT CONDITION DNSHARE REAL", x="DNSHARE Real Political Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = trmt_tab_real_DN_political, aes(x = DNSHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL TREATMENT CONDITION DNSHare REAL", x="DNSHARE Real Political Videos", y="")

## POLITICAL REAL SHARE TRMT CNTRL

#########################################
##   TREATMENT SHARE REASONS  FOR REAL  #########
##########################################

 trmt_all_real_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Treatment")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Treatment Condition DNSHARE
####################################

# Table with RespondID, DNSharing reasons and entertainment
trmt_tab_real_share_political<- trmt_all_real_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = trmt_tab_real_share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL TREATMENT CONDITION SHARE REAL", x="SHARE Real Political Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = trmt_tab_real_share_political, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL TREATMENT CONDITION Share REAL", x="SHARE Real Political Videos", y="")

#########################################
##   CONTROL SHARE REASONS  FOR REAL  #########
##########################################

 cntrl_all_real_clean_political_data <-study2_data_raw_pol |>
  # just using select is not working so needed to call directly from package 
  mutate (condition = if_else(is.na(CTRL_JUDGING_IMPACT), "Treatment", "Control" )) |>
  filter (condition == "Control")|>
  dplyr::select(ResponseId, matches("_R_"))|> 
  pivot_longer(-ResponseId) |>
  separate(name,
           into = c("video", "realness", "name"),
           extra = "merge") |>
  mutate(
    name = sub("REAS.*", "REASON", name),
    #video = as_numeric(sub("P","",video))
    ) |>
  pivot_wider() |>
  filter(!is.na(SHARE)) |>
  separate_rows(SHARE_REASON, sep = ",") |>
  separate_rows(DNSHARE_REASON, sep = ",") |>
  mutate(
    SHARE_OTHER = if_else(grepl("Other", SHARE_REASON), SHARE_OTHER, NA),
    DNSHARE_OTHER = if_else(grepl("Other", DNSHARE_REASON), DNSHARE_OTHER, NA)
  )

####################################
# Control Condition SHARE
####################################

# Table with RespondID, Sharing reasons and entertainment
cntrl_tab_real_share_political<- cntrl_all_real_clean_political_data |>
  dplyr::select(ResponseId, SHARE_REASON, video)


ggplot(data = cntrl_tab_real_share_political, aes(x = SHARE_REASON)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL CONTROL CONDITION SHARE REAL", x="SHARE Real Political Videos", y="")

##### The DN share reasons group by videos #######

ggplot(data = cntrl_tab_real_share_political, aes(x = SHARE_REASON, fill= video)) + 
    geom_bar(position = "dodge") + theme(axis.text.x = element_text(angle = 90, hjust =0 )) + coord_flip() +
  labs(title = "POLITICAL CONTROL CONDITION Share REAL", x="SHARE Real Political Videos", y="")

Study 2 Combined

Dilrukshi

2023-08-07