In this study, we intent to explore do people believe deepfakes and their sharing intentions of deepfake videos.
We pre-registerd our study in this -
Data sets can be found in our Github Repo -
## New names:
## Rows: 130 Columns: 261
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (261): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 131 Columns: 261
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (261): StartDate, EndDate, Status, Progress, Duration (in seconds), Fini...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `1P_R_IMPT` -> `1P_R_IMPT...27`
## • `1P_R_INTEREST` -> `1P_R_INTEREST...28`
## • `1P_R_FAMILIAR` -> `1P_R_FAMILIAR...29`
## • `1P_R_NOVEL` -> `1P_R_NOVEL...30`
## • `1P_R_SHARE` -> `1P_R_SHARE...31`
## • `1P_R_SHARE_REASONS` -> `1P_R_SHARE_REASONS...32`
## • `1P_R_SHARE_OTHER` -> `1P_R_SHARE_OTHER...33`
## • `1P_R_DNSHARE_REASONS` -> `1P_R_DNSHARE_REASONS...34`
## • `1P_R_DNSHARE_OTHER` -> `1P_R_DNSHARE_OTHER...35`
## • `2P_DF_IMPT` -> `2P_DF_IMPT...37`
## • `2P_DF_INTEREST` -> `2P_DF_INTEREST...38`
## • `2P_DF_FAMILIAR` -> `2P_DF_FAMILIAR...39`
## • `2P_DF_NOVEL` -> `2P_DF_NOVEL...40`
## • `2P_DF_SHARE` -> `2P_DF_SHARE...41`
## • `2P_DF_SHARE_REASONS` -> `2P_DF_SHARE_REASONS...42`
## • `2P_DF_SHARE_OTHER` -> `2P_DF_SHARE_OTHER...43`
## • `2P_DF_DNSHARE_REASON` -> `2P_DF_DNSHARE_REASON...44`
## • `2P_DF_DNSHARE_OTHER` -> `2P_DF_DNSHARE_OTHER...45`
## • `4P_DF_IMPT` -> `4P_DF_IMPT...47`
## • `4P_DF_INTEREST` -> `4P_DF_INTEREST...48`
## • `4P_DF_FAMILIAR` -> `4P_DF_FAMILIAR...49`
## • `4P_DF_NOVEL` -> `4P_DF_NOVEL...50`
## • `4P_DF_SHARE` -> `4P_DF_SHARE...51`
## • `4P_DF_SHARE_REASONS` -> `4P_DF_SHARE_REASONS...52`
## • `4P_DF_SHARE_OTHER` -> `4P_DF_SHARE_OTHER...53`
## • `4P_DF_DNSHARE_REASON` -> `4P_DF_DNSHARE_REASON...54`
## • `4P_DF_DNSHARE_OTHER` -> `4P_DF_DNSHARE_OTHER...55`
## • `5P_R_IMPT` -> `5P_R_IMPT...57`
## • `5P_R_INTEREST` -> `5P_R_INTEREST...58`
## • `5P_R_FAMILIAR` -> `5P_R_FAMILIAR...59`
## • `5P_R_NOVEL` -> `5P_R_NOVEL...60`
## • `5P_R_SHARE` -> `5P_R_SHARE...61`
## • `5P_R_SHARE_REASONS` -> `5P_R_SHARE_REASONS...62`
## • `5P_R_SHARE_OTHER` -> `5P_R_SHARE_OTHER...63`
## • `5P_R_DNSHARE_REASONS` -> `5P_R_DNSHARE_REASONS...64`
## • `5P_R_DNSHARE_OTHER` -> `5P_R_DNSHARE_OTHER...65`
## • `6P_DF_IMPT` -> `6P_DF_IMPT...67`
## • `6P_DF_INTEREST` -> `6P_DF_INTEREST...68`
## • `6P_DF_FAMILIAR` -> `6P_DF_FAMILIAR...69`
## • `6P_DF_NOVEL` -> `6P_DF_NOVEL...70`
## • `6P_DF_SHARE` -> `6P_DF_SHARE...71`
## • `6P_DF_SHARE_REASONS` -> `6P_DF_SHARE_REASONS...72`
## • `6P_DF_SHARE_OTHER` -> `6P_DF_SHARE_OTHER...73`
## • `6P_DF_DNSHARE_REASON` -> `6P_DF_DNSHARE_REASON...74`
## • `6P_DF_DNSHARE_OTHER` -> `6P_DF_DNSHARE_OTHER...75`
## • `7P_R_IMPT` -> `7P_R_IMPT...77`
## • `7P_R_INTEREST` -> `7P_R_INTEREST...78`
## • `7P_R_FAMILIAR` -> `7P_R_FAMILIAR...79`
## • `7P_R_NOVEL` -> `7P_R_NOVEL...80`
## • `7P_R_SHARE` -> `7P_R_SHARE...81`
## • `7P_R_SHARE_REASONS` -> `7P_R_SHARE_REASONS...82`
## • `7P_R_SHARE_OTHER` -> `7P_R_SHARE_OTHER...83`
## • `7P_R_DNSHARE_REASONS` -> `7P_R_DNSHARE_REASONS...84`
## • `7P_R_DNSHARE_OTHER` -> `7P_R_DNSHARE_OTHER...85`
## • `13P_DF_IMPT` -> `13P_DF_IMPT...87`
## • `13P_DF_INTEREST` -> `13P_DF_INTEREST...88`
## • `13P_DF_FAMILIAR` -> `13P_DF_FAMILIAR...89`
## • `13P_DF_NOVEL` -> `13P_DF_NOVEL...90`
## • `13P_DF_SHARE` -> `13P_DF_SHARE...91`
## • `13P_DF_SHARE_REASONS` -> `13P_DF_SHARE_REASONS...92`
## • `13P_DF_SHARE_OTHER` -> `13P_DF_SHARE_OTHER...93`
## • `13P_DF_DNSHARE_REASO` -> `13P_DF_DNSHARE_REASO...94`
## • `13P_DF_DNSHARE_OTHER` -> `13P_DF_DNSHARE_OTHER...95`
## • `15P_DF_IMPT` -> `15P_DF_IMPT...97`
## • `15P_DF_INTEREST` -> `15P_DF_INTEREST...98`
## • `15P_DF_FAMILIAR` -> `15P_DF_FAMILIAR...99`
## • `15P_DF_NOVEL` -> `15P_DF_NOVEL...100`
## • `15P_DF_SHARE` -> `15P_DF_SHARE...101`
## • `15P_DF_SHARE_REASONS` -> `15P_DF_SHARE_REASONS...102`
## • `15P_DF_SHARE_OTHER` -> `15P_DF_SHARE_OTHER...103`
## • `15P_DF_DNSHARE_REASO` -> `15P_DF_DNSHARE_REASO...104`
## • `15P_DF_DNSHARE_OTHER` -> `15P_DF_DNSHARE_OTHER...105`
## • `17P_R_IMPT` -> `17P_R_IMPT...107`
## • `17P_R_INTEREST` -> `17P_R_INTEREST...108`
## • `17P_R_FAMILIAR` -> `17P_R_FAMILIAR...109`
## • `17P_R_NOVEL` -> `17P_R_NOVEL...110`
## • `17P_R_SHARE` -> `17P_R_SHARE...111`
## • `17P_R_SHARE_REASONS` -> `17P_R_SHARE_REASONS...112`
## • `17P_R_SHARE_OTHER` -> `17P_R_SHARE_OTHER...113`
## • `17P_R_DNSHARE_REASON` -> `17P_R_DNSHARE_REASON...114`
## • `17P_R_DNSHARE_OTHER` -> `17P_R_DNSHARE_OTHER...115`
## • `19P_DF_IMPT` -> `19P_DF_IMPT...117`
## • `19P_DF_INTEREST` -> `19P_DF_INTEREST...118`
## • `19P_DF_FAMILIAR` -> `19P_DF_FAMILIAR...119`
## • `19P_DF_NOVEL` -> `19P_DF_NOVEL...120`
## • `19P_DF_SHARE` -> `19P_DF_SHARE...121`
## • `19P_DF_SHARE_REASONS` -> `19P_DF_SHARE_REASONS...122`
## • `19P_DF_SHARE_OTHER` -> `19P_DF_SHARE_OTHER...123`
## • `19P_DF_DNSHARE_REASO` -> `19P_DF_DNSHARE_REASO...124`
## • `19P_DF_DNSHARE_OTHER` -> `19P_DF_DNSHARE_OTHER...125`
## • `21P_R_IMPT` -> `21P_R_IMPT...127`
## • `21P_R_INTEREST` -> `21P_R_INTEREST...128`
## • `21P_R_FAMILIAR` -> `21P_R_FAMILIAR...129`
## • `21P_R_NOVEL` -> `21P_R_NOVEL...130`
## • `21P_R_SHARE` -> `21P_R_SHARE...131`
## • `21P_R_SHARE_REASONS` -> `21P_R_SHARE_REASONS...132`
## • `21P_R_SHARE_OTHER` -> `21P_R_SHARE_OTHER...133`
## • `21P_R_DNSHARE_REASON` -> `21P_R_DNSHARE_REASON...134`
## • `21P_R_DNSHARE_OTHER` -> `21P_R_DNSHARE_OTHER...135`
## • `23P_R_IMPT` -> `23P_R_IMPT...137`
## • `23P_R_INTEREST` -> `23P_R_INTEREST...138`
## • `23P_R_FAMILIAR` -> `23P_R_FAMILIAR...139`
## • `23P_R_NOVEL` -> `23P_R_NOVEL...140`
## • `23P_R_SHARE` -> `23P_R_SHARE...141`
## • `23P_R_SHARE_REASONS` -> `23P_R_SHARE_REASONS...142`
## • `23P_R_SHARE_OTHER` -> `23P_R_SHARE_OTHER...143`
## • `23P_R_DNSHARE_REASON` -> `23P_R_DNSHARE_REASON...144`
## • `23P_R_DNSHARE_OTHER` -> `23P_R_DNSHARE_OTHER...145`
## • `CTRL_SHARING_PERSP` -> `CTRL_SHARING_PERSP...147`
## • `CTRL_SHARING_PERSP` -> `CTRL_SHARING_PERSP...148`
## • `1P_R_IMPT` -> `1P_R_IMPT...150`
## • `1P_R_INTEREST` -> `1P_R_INTEREST...151`
## • `1P_R_FAMILIAR` -> `1P_R_FAMILIAR...152`
## • `1P_R_NOVEL` -> `1P_R_NOVEL...153`
## • `1P_R_SHARE` -> `1P_R_SHARE...154`
## • `1P_R_SHARE_REASONS` -> `1P_R_SHARE_REASONS...155`
## • `1P_R_SHARE_OTHER` -> `1P_R_SHARE_OTHER...156`
## • `1P_R_DNSHARE_REASONS` -> `1P_R_DNSHARE_REASONS...157`
## • `1P_R_DNSHARE_OTHER` -> `1P_R_DNSHARE_OTHER...158`
## • `2P_DF_IMPT` -> `2P_DF_IMPT...159`
## • `2P_DF_INTEREST` -> `2P_DF_INTEREST...160`
## • `2P_DF_FAMILIAR` -> `2P_DF_FAMILIAR...161`
## • `2P_DF_NOVEL` -> `2P_DF_NOVEL...162`
## • `2P_DF_SHARE` -> `2P_DF_SHARE...163`
## • `2P_DF_SHARE_REASONS` -> `2P_DF_SHARE_REASONS...164`
## • `2P_DF_SHARE_OTHER` -> `2P_DF_SHARE_OTHER...165`
## • `2P_DF_DNSHARE_REASON` -> `2P_DF_DNSHARE_REASON...166`
## • `2P_DF_DNSHARE_OTHER` -> `2P_DF_DNSHARE_OTHER...167`
## • `4P_DF_IMPT` -> `4P_DF_IMPT...168`
## • `4P_DF_INTEREST` -> `4P_DF_INTEREST...169`
## • `4P_DF_FAMILIAR` -> `4P_DF_FAMILIAR...170`
## • `4P_DF_NOVEL` -> `4P_DF_NOVEL...171`
## • `4P_DF_SHARE` -> `4P_DF_SHARE...172`
## • `4P_DF_SHARE_REASONS` -> `4P_DF_SHARE_REASONS...173`
## • `4P_DF_SHARE_OTHER` -> `4P_DF_SHARE_OTHER...174`
## • `4P_DF_DNSHARE_REASON` -> `4P_DF_DNSHARE_REASON...175`
## • `4P_DF_DNSHARE_OTHER` -> `4P_DF_DNSHARE_OTHER...176`
## • `5P_R_IMPT` -> `5P_R_IMPT...177`
## • `5P_R_INTEREST` -> `5P_R_INTEREST...178`
## • `5P_R_FAMILIAR` -> `5P_R_FAMILIAR...179`
## • `5P_R_NOVEL` -> `5P_R_NOVEL...180`
## • `5P_R_SHARE` -> `5P_R_SHARE...181`
## • `5P_R_SHARE_REASONS` -> `5P_R_SHARE_REASONS...182`
## • `5P_R_SHARE_OTHER` -> `5P_R_SHARE_OTHER...183`
## • `5P_R_DNSHARE_REASONS` -> `5P_R_DNSHARE_REASONS...184`
## • `5P_R_DNSHARE_OTHER` -> `5P_R_DNSHARE_OTHER...185`
## • `6P_DF_IMPT` -> `6P_DF_IMPT...186`
## • `6P_DF_INTEREST` -> `6P_DF_INTEREST...187`
## • `6P_DF_FAMILIAR` -> `6P_DF_FAMILIAR...188`
## • `6P_DF_NOVEL` -> `6P_DF_NOVEL...189`
## • `6P_DF_SHARE` -> `6P_DF_SHARE...190`
## • `6P_DF_SHARE_REASONS` -> `6P_DF_SHARE_REASONS...191`
## • `6P_DF_SHARE_OTHER` -> `6P_DF_SHARE_OTHER...192`
## • `6P_DF_DNSHARE_REASON` -> `6P_DF_DNSHARE_REASON...193`
## • `6P_DF_DNSHARE_OTHER` -> `6P_DF_DNSHARE_OTHER...194`
## • `7P_R_IMPT` -> `7P_R_IMPT...195`
## • `7P_R_INTEREST` -> `7P_R_INTEREST...196`
## • `7P_R_FAMILIAR` -> `7P_R_FAMILIAR...197`
## • `7P_R_NOVEL` -> `7P_R_NOVEL...198`
## • `7P_R_SHARE` -> `7P_R_SHARE...199`
## • `7P_R_SHARE_REASONS` -> `7P_R_SHARE_REASONS...200`
## • `7P_R_SHARE_OTHER` -> `7P_R_SHARE_OTHER...201`
## • `7P_R_DNSHARE_REASONS` -> `7P_R_DNSHARE_REASONS...202`
## • `7P_R_DNSHARE_OTHER` -> `7P_R_DNSHARE_OTHER...203`
## • `13P_DF_IMPT` -> `13P_DF_IMPT...204`
## • `13P_DF_INTEREST` -> `13P_DF_INTEREST...205`
## • `13P_DF_FAMILIAR` -> `13P_DF_FAMILIAR...206`
## • `13P_DF_NOVEL` -> `13P_DF_NOVEL...207`
## • `13P_DF_SHARE` -> `13P_DF_SHARE...208`
## • `13P_DF_SHARE_REASONS` -> `13P_DF_SHARE_REASONS...209`
## • `13P_DF_SHARE_OTHER` -> `13P_DF_SHARE_OTHER...210`
## • `13P_DF_DNSHARE_REASO` -> `13P_DF_DNSHARE_REASO...211`
## • `13P_DF_DNSHARE_OTHER` -> `13P_DF_DNSHARE_OTHER...212`
## • `15P_DF_IMPT` -> `15P_DF_IMPT...213`
## • `15P_DF_INTEREST` -> `15P_DF_INTEREST...214`
## • `15P_DF_FAMILIAR` -> `15P_DF_FAMILIAR...215`
## • `15P_DF_NOVEL` -> `15P_DF_NOVEL...216`
## • `15P_DF_SHARE` -> `15P_DF_SHARE...217`
## • `15P_DF_SHARE_REASONS` -> `15P_DF_SHARE_REASONS...218`
## • `15P_DF_SHARE_OTHER` -> `15P_DF_SHARE_OTHER...219`
## • `15P_DF_DNSHARE_REASO` -> `15P_DF_DNSHARE_REASO...220`
## • `15P_DF_DNSHARE_OTHER` -> `15P_DF_DNSHARE_OTHER...221`
## • `17P_R_IMPT` -> `17P_R_IMPT...222`
## • `17P_R_INTEREST` -> `17P_R_INTEREST...223`
## • `17P_R_FAMILIAR` -> `17P_R_FAMILIAR...224`
## • `17P_R_NOVEL` -> `17P_R_NOVEL...225`
## • `17P_R_SHARE` -> `17P_R_SHARE...226`
## • `17P_R_SHARE_REASONS` -> `17P_R_SHARE_REASONS...227`
## • `17P_R_SHARE_OTHER` -> `17P_R_SHARE_OTHER...228`
## • `17P_R_DNSHARE_REASON` -> `17P_R_DNSHARE_REASON...229`
## • `17P_R_DNSHARE_OTHER` -> `17P_R_DNSHARE_OTHER...230`
## • `19P_DF_IMPT` -> `19P_DF_IMPT...231`
## • `19P_DF_INTEREST` -> `19P_DF_INTEREST...232`
## • `19P_DF_FAMILIAR` -> `19P_DF_FAMILIAR...233`
## • `19P_DF_NOVEL` -> `19P_DF_NOVEL...234`
## • `19P_DF_SHARE` -> `19P_DF_SHARE...235`
## • `19P_DF_SHARE_REASONS` -> `19P_DF_SHARE_REASONS...236`
## • `19P_DF_SHARE_OTHER` -> `19P_DF_SHARE_OTHER...237`
## • `19P_DF_DNSHARE_REASO` -> `19P_DF_DNSHARE_REASO...238`
## • `19P_DF_DNSHARE_OTHER` -> `19P_DF_DNSHARE_OTHER...239`
## • `21P_R_IMPT` -> `21P_R_IMPT...240`
## • `21P_R_INTEREST` -> `21P_R_INTEREST...241`
## • `21P_R_FAMILIAR` -> `21P_R_FAMILIAR...242`
## • `21P_R_NOVEL` -> `21P_R_NOVEL...243`
## • `21P_R_SHARE` -> `21P_R_SHARE...244`
## • `21P_R_SHARE_REASONS` -> `21P_R_SHARE_REASONS...245`
## • `21P_R_SHARE_OTHER` -> `21P_R_SHARE_OTHER...246`
## • `21P_R_DNSHARE_REASON` -> `21P_R_DNSHARE_REASON...247`
## • `21P_R_DNSHARE_OTHER` -> `21P_R_DNSHARE_OTHER...248`
## • `23P_R_IMPT` -> `23P_R_IMPT...249`
## • `23P_R_INTEREST` -> `23P_R_INTEREST...250`
## • `23P_R_FAMILIAR` -> `23P_R_FAMILIAR...251`
## • `23P_R_NOVEL` -> `23P_R_NOVEL...252`
## • `23P_R_SHARE` -> `23P_R_SHARE...253`
## • `23P_R_SHARE_REASONS` -> `23P_R_SHARE_REASONS...254`
## • `23P_R_SHARE_OTHER` -> `23P_R_SHARE_OTHER...255`
## • `23P_R_DNSHARE_REASON` -> `23P_R_DNSHARE_REASON...256`
## • `23P_R_DNSHARE_OTHER` -> `23P_R_DNSHARE_OTHER...257`
The influence of believing and sharing political videos :
#Likert scale has
likely_values <- c(
"very unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"very likely"
)
consume_values <- c(
"Less than 1 hour per day",
"1-2 hours per day",
"2-3 hours per day",
"3-4 hours per day",
"5+ hours per day"
)
interest_levels <- c ( "not at all interested in this",
"not interested",
"neither not interested nor interested",
"interested",
"very much interested"
)
likley_shory_values <- c("Very unlikely",
"Unlikely",
"Neither likely nor unlikely",
"Likely",
"Very likely")
knowledgable_values <- c(
"very unknowledgeable",
"somewhat unknowledgeable",
"neither",
"somewhat knowledgeable",
"very knowledgeable"
)
easy_levels <- c("Very difficult",
"Difficult",
"Neither difficult nor easy",
"Easy",
"Very easy")
boolen_q <- c( "Yes", "No")
importance_levels <- c ("very unimportant" ,
"unimportant",
"neither important nor unimportant",
"important" ,
"very important"
)
novel_levels <-c ("not at all novel",
"not novel",
"neither novel nor not novel",
"novel",
"very novel")
familiar_levels <-c ("not at all familiar",
"not familiar",
"neither familiar nor unfamiliar",
"familiar",
"very familiar")
believe_levels <- c("very unlikely",
"unlikely",
"neither likely nor unlikely",
"likely",
"very likely" )
####################
#After the survey, the postsurvey questions
judging_impact_values <- c(
"extremely unlikely",
"moderately unlikely",
"slightly unlikely",
"slightly likely",
"moderately likely",
"extremely likely"
)
sharing_accuracy_level <- c ("not at all important",
"moderately important" ,
"slightly important",
"neither important nor unimportant",
"very important" ,
"extremely important"
)
# Clean Political data frame separating video into deepfake/real, share likely values and,
clean_data_pol <- data_raw_pol |> filter(Finished == "True") |>
select(ResponseId, matches("_DF_"), matches("_R_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail")) |>
group_by(ResponseId) |>
mutate(condition = if_else(any(grepl("BELIEVE", question)), "Treatment", "Control")) |>
ungroup() |>
filter(question == "SHARE", question_detail != "REASONS", question_detail != "OTHER") |>
select(-question_detail) |>
pivot_wider(names_from = question, values_from = value) |>
unite("video", video, fake) |>
mutate(
SHARE = str_to_lower(SHARE),
share = ordered(SHARE,levels = likely_values),
share_numerical = as.numeric(share)
) |>
separate(video, c("video", "type")) |>
mutate(type = if_else(type == "R", "Real", "Deepfake")) |>
select(-SHARE)
## Warning: Expected 4 pieces. Additional pieces discarded in 1621 rows [6, 12, 18, 24, 30,
## 36, 42, 48, 54, 60, 66, 72, 79, 86, 93, 100, 107, 114, 121, 122, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 756 rows [73, 80, 87, 94,
## 101, 108, 115, 123, 130, 137, 144, 152, 232, 239, 246, 253, 260, 267, 274, 281,
## ...].
head(clean_data_pol)
## # A tibble: 6 × 6
## ResponseId video type condition share share_numerical
## <chr> <chr> <chr> <chr> <ord> <dbl>
## 1 R_ezxZ4Fh2teO3yCZ 2P Deepfake Control very unlikely 1
## 2 R_ezxZ4Fh2teO3yCZ 4P Deepfake Control slightly unlikely 3
## 3 R_ezxZ4Fh2teO3yCZ 6P Deepfake Control very unlikely 1
## 4 R_ezxZ4Fh2teO3yCZ 13P Deepfake Control slightly unlikely 3
## 5 R_ezxZ4Fh2teO3yCZ 15P Deepfake Control very unlikely 1
## 6 R_ezxZ4Fh2teO3yCZ 19P Deepfake Control very unlikely 1
clean_data_pol |>
group_by(condition,type) |>
summarise(share = mean(share_numerical))
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups: condition [2]
## condition type share
## <chr> <chr> <dbl>
## 1 Control Deepfake 1.54
## 2 Control Real 1.81
## 3 Treatment Deepfake 1.70
## 4 Treatment Real 1.71
clean_data_pol |> ggplot(aes(condition,share_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
clean_data_ent <- data_raw_ent |> filter(Finished == "True") |>
select(ResponseId, matches("_DF_"), matches("_R_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail")) |>
group_by(ResponseId) |>
mutate(condition = if_else(any(grepl("BELIEVE", question)), "Treatment", "Control")) |>
ungroup() |>
filter(question == "SHARE", question_detail != "REASONS", question_detail != "OTHER") |>
select(-question_detail) |>
pivot_wider(names_from = question, values_from = value) |>
unite("video", video, fake) |>
mutate(
SHARE = str_to_lower(SHARE),
share = ordered(SHARE,levels = likely_values),
share_numerical = as.numeric(share)
) |>
separate(video, c("video", "type")) |>
mutate(type = if_else(type == "R", "Real", "Deepfake")) |>
select(-SHARE)
## Warning: Expected 4 pieces. Additional pieces discarded in 1610 rows [7, 14, 21, 28, 35,
## 42, 49, 56, 63, 70, 77, 84, 90, 96, 102, 108, 114, 120, 126, 132, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 780 rows [1, 8, 15, 22,
## 29, 36, 43, 50, 57, 64, 71, 78, 230, 237, 244, 251, 258, 265, 272, 279, ...].
head(clean_data_ent)
## # A tibble: 6 × 6
## ResponseId video type condition share share_numerical
## <chr> <chr> <chr> <chr> <ord> <dbl>
## 1 R_3Jh3kj1y3kaZbyl 8E Deepfake Treatment very unlikely 1
## 2 R_3Jh3kj1y3kaZbyl 10E Deepfake Treatment moderately unlikely 2
## 3 R_3Jh3kj1y3kaZbyl 12E Deepfake Treatment very unlikely 1
## 4 R_3Jh3kj1y3kaZbyl 14E Deepfake Treatment very unlikely 1
## 5 R_3Jh3kj1y3kaZbyl 16E Deepfake Treatment very unlikely 1
## 6 R_3Jh3kj1y3kaZbyl 24E Deepfake Treatment moderately unlikely 2
clean_data_ent |>
group_by(condition,type) |>
summarise(share = mean(share_numerical))
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups: condition [2]
## condition type share
## <chr> <chr> <dbl>
## 1 Control Deepfake 1.80
## 2 Control Real 2.32
## 3 Treatment Deepfake 1.57
## 4 Treatment Real 1.90
clean_data_ent |> ggplot(aes(condition,share_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
library(stringr)
All_combined_pol_ent <- rbind( clean_data_ent,clean_data_pol)
All_videos_combined<- All_combined_pol_ent |>
mutate (Videotype = ifelse(str_detect (All_combined_pol_ent$video, "P"), "Political", "Entertainment"))
tail (All_videos_combined)
## # A tibble: 6 × 7
## ResponseId video type condition share share_numerical Videotype
## <chr> <chr> <chr> <chr> <ord> <dbl> <chr>
## 1 R_3lukBqCKhPAkNjn 1P Real Control very likely 6 Political
## 2 R_3lukBqCKhPAkNjn 5P Real Control slightly un… 3 Political
## 3 R_3lukBqCKhPAkNjn 7P Real Control slightly un… 3 Political
## 4 R_3lukBqCKhPAkNjn 17P Real Control very likely 6 Political
## 5 R_3lukBqCKhPAkNjn 21P Real Control slightly li… 4 Political
## 6 R_3lukBqCKhPAkNjn 23P Real Control very unlike… 1 Political
All_videos_combined |>
group_by(condition,type, Videotype ) |>
summarise(share = mean(share_numerical))
## `summarise()` has grouped output by 'condition', 'type'. You can override using
## the `.groups` argument.
## # A tibble: 8 × 4
## # Groups: condition, type [4]
## condition type Videotype share
## <chr> <chr> <chr> <dbl>
## 1 Control Deepfake Entertainment 1.80
## 2 Control Deepfake Political 1.54
## 3 Control Real Entertainment 2.37
## 4 Control Real Political 1.84
## 5 Treatment Deepfake Entertainment 1.57
## 6 Treatment Deepfake Political 1.70
## 7 Treatment Real Entertainment 1.93
## 8 Treatment Real Political 1.72
All_videos_combined |> ggplot(aes(condition,share_numerical)) +
# facet_wrap(vars(video)) +
stat_summary(
fun.data = mean_cl_boot,
geom = "pointrange",
shape = 21,
fill = "white"
)
#-------1-One way ANOVA -----------------
pol_oneway <- aov(share_numerical ~ condition, data = clean_data_pol)
summary(pol_oneway)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 0.4 0.424 0.326 0.568
## Residuals 1534 1993.2 1.299
#----------2-Two way ANOVA without interactions----------
#ANOVA Two way without interactions
pol_twoway_no_interactions <- aov (share_numerical ~ type + condition,
data= clean_data_pol)
summary (pol_twoway_no_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 8.0 8.021 6.194 0.0129 *
## condition 1 0.4 0.424 0.327 0.5673
## Residuals 1533 1985.2 1.295
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------3-Two way ANOVA with Interactions----------------
#ANOVA 2 way with interactions
pol_twoway_with_interactions <- aov (share_numerical ~type * condition,
data=clean_data_pol)
summary ( pol_twoway_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 8.0 8.021 6.209 0.0128 *
## condition 1 0.4 0.424 0.328 0.5668
## type:condition 1 5.9 5.910 4.574 0.0326 *
## Residuals 1532 1979.3 1.292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-------1-One way ANOVA -----------------
ent_oneway <- aov(share_numerical ~ condition, data = clean_data_ent)
summary(ent_oneway)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 40.3 40.29 21.2 4.46e-06 ***
## Residuals 1546 2937.5 1.90
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#----------2-Two way ANOVA without interactions----------
#ANOVA Two way without interactions
ent_twoway_no_interactions <- aov (share_numerical ~ type + condition,
data= clean_data_ent)
summary (ent_twoway_no_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 70.8 70.78 38.14 8.39e-10 ***
## condition 1 40.3 40.29 21.71 3.44e-06 ***
## Residuals 1545 2866.8 1.86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------3-Two way ANOVA with Interactions----------------
#ANOVA 2 way with interactions
ent_twoway_with_interactions <- aov (share_numerical ~type * condition,
data=clean_data_ent)
summary ( ent_twoway_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 70.8 70.78 38.163 8.31e-10 ***
## condition 1 40.3 40.29 21.726 3.42e-06 ***
## type:condition 1 3.3 3.31 1.784 0.182
## Residuals 1544 2863.5 1.85
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-------1-One way ANOVA -----------------
combined_oneway <- aov(share_numerical ~ condition, data = All_videos_combined)
summary(combined_oneway)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 1 16 15.765 9.74 0.00182 **
## Residuals 3082 4989 1.619
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#----------2-Two way ANOVA without interactions----------
#ANOVA Two way without interactions
combined_twoway_no_interactions <- aov (share_numerical ~ type + condition,
data= All_videos_combined)
summary (combined_twoway_no_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 63 63.35 39.627 3.51e-10 ***
## condition 1 16 15.77 9.862 0.0017 **
## Residuals 3081 4925 1.60
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------3-Two way ANOVA with Interactions----------------
#ANOVA 2 way with interactions
combined_twoway_with_interactions <- aov (share_numerical ~type * condition,
data=All_videos_combined)
summary ( combined_twoway_with_interactions)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 63 63.35 39.685 3.41e-10 ***
## condition 1 16 15.77 9.876 0.00169 **
## type:condition 1 9 8.75 5.482 0.01927 *
## Residuals 3080 4916 1.60
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Combined being political and entertainment effect
combined_twoway_without_interactions_videotype <-
aov (share_numerical ~type + Videotype+ condition,
data=All_videos_combined)
summary ( combined_twoway_without_interactions_videotype )
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 63 63.35 39.90 3.06e-10 ***
## Videotype 1 34 34.45 21.70 3.33e-06 ***
## condition 1 16 16.27 10.25 0.00138 **
## Residuals 3080 4890 1.59
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# One way
library (lsr)
# Eta sqr one way
Eta_oneway <-etaSquared(pol_oneway)
print (Eta_oneway)
## eta.sq eta.sq.part
## condition 0.0002126591 0.0002126591
#Eta sqr for twoway without interactions
Eta_twoway_nointeraction<-etaSquared(pol_twoway_no_interactions)
print(Eta_twoway_nointeraction)
## eta.sq eta.sq.part
## type 0.0040235686 0.0040244245
## condition 0.0002126591 0.0002135182
Eta_twoway_with_interaction<-etaSquared(pol_twoway_with_interactions)
print(Eta_twoway_with_interaction)
## eta.sq eta.sq.part
## type 0.0040235686 0.0040363927
## condition 0.0002126591 0.0002141556
## type:condition 0.0029644487 0.0029770602
# One way
library (lsr)
# Eta sqr one way
Eta_oneway_ent <-etaSquared(ent_oneway)
print (Eta_oneway_ent)
## eta.sq eta.sq.part
## condition 0.01353063 0.01353063
#Eta sqr for twoway without interactions
Eta_twoway_nointeraction_ent<-etaSquared(ent_twoway_no_interactions)
print(Eta_twoway_nointeraction_ent)
## eta.sq eta.sq.part
## type 0.02376754 0.02409354
## condition 0.01353063 0.01386005
#Eta sqr for twoway with interactions
Eta_twoway_with_interaction_ent<-etaSquared(ent_twoway_with_interactions)
print(Eta_twoway_with_interaction_ent)
## eta.sq eta.sq.part
## type 0.023767537 0.024120706
## condition 0.013530632 0.013875845
## type:condition 0.001111134 0.001154183
# One way
library (lsr)
# Eta sqr one way
Eta_oneway_combined <-etaSquared(combined_oneway)
print (Eta_oneway_combined)
## eta.sq eta.sq.part
## condition 0.003150313 0.003150313
#Eta sqr for twoway without interactions
Eta_twoway_nointeraction_combined<-etaSquared(combined_twoway_no_interactions)
print(Eta_twoway_nointeraction_combined)
## eta.sq eta.sq.part
## type 0.012658497 0.012698501
## condition 0.003150313 0.003190702
#Eta sqr for twoway with interactions
Eta_twoway_with_interaction_combined<-etaSquared(combined_twoway_with_interactions)
print(Eta_twoway_with_interaction_combined)
## eta.sq eta.sq.part
## type 0.012658497 0.012720816
## condition 0.003150313 0.003196364
## type:condition 0.001748696 0.001776785
#=================================================================================== #Political data
pol_individual_df<-data_raw_pol |> filter(Finished == "True") |>
mutate(Duration = as.numeric(`Duration (in seconds)`),
AGE = as.numeric(AGE),
BROWSE_INTERNET = ordered(BROWSE_INTERNET, levels = consume_values),
browse_internet = as.numeric(BROWSE_INTERNET, levels = consume_values),
USE_SNS = ordered(USE_SNS, levels = consume_values),
use_sns = as.numeric(USE_SNS, levels = consume_values),
WATCHING_BEHAVIOR = ordered(WATCHING_BEHAVIOR, levels =likley_shory_values),
watching_behavior = as.numeric (WATCHING_BEHAVIOR, levels =likley_shory_values),
SHARING_BEHAVIOR = ordered(SHARING_BEHAVIOR, levels = likley_shory_values),
sharing_behavior = as.numeric(SHARING_BEHAVIOR, levels = likley_shory_values),
KNOW_DEEPFAKE = KNOW_DEEPFAKE == "Yes",
EXP_CREATE_DF = EXP_CREATE_DF == "Yes",
KNOW_CREATE_DF = str_to_lower(KNOW_CREATE_DF),
KNOW_CREATE_DF = ordered(KNOW_CREATE_DF, levels = knowledgable_values),
know_create_df = as.numeric(KNOW_CREATE_DF, levels = knowledgable_values),
EASE_CREATE_DF = ordered(EASE_CREATE_DF, levels = easy_levels),
ease_create_df = as.numeric(EASE_CREATE_DF, levels = easy_levels)) |>
select(
ResponseId,
Duration,
AGE,
BROWSE_INTERNET,
browse_internet,
USE_SNS,
use_sns,
SNS_PLATFORM_USE,
WATCHING_BEHAVIOR,
watching_behavior,
SHARING_BEHAVIOR,
sharing_behavior,
KNOW_DEEPFAKE,
KNOW_CREATE_DF,
know_create_df,
EXP_CREATE_DF,
EASE_CREATE_DF,
ease_create_df
) |> mutate(
SNS_PLATFORM_USE = strsplit(SNS_PLATFORM_USE,split = ","),
value = TRUE
) |>
unnest() |>
mutate(SNS_PLATFORM_USE = paste0("Plat_",SNS_PLATFORM_USE)) |>
pivot_wider(names_from = SNS_PLATFORM_USE, values_fill = FALSE)
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c(SNS_PLATFORM_USE)`.
# Clean Political data frame separating video into deepfake/real, share likely values and,
pol_behavior_df <- data_raw_pol |> filter(Finished == "True") |>
select(ResponseId, matches("_DF_"), matches("_R_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail")) |>
group_by(ResponseId) |>
mutate(condition = if_else(any(grepl("BELIEVE", question)), "Treatment", "Control")) |>
ungroup() |>
filter( question_detail != "REASONS", question_detail != "OTHER") |>
select(-question_detail) |>
pivot_wider(names_from = question, values_from = value) |>
unite("video", video, fake) |>
mutate(
SHARE = str_to_lower(SHARE),
share = ordered(SHARE,levels = likely_values),
share_numerical = as.numeric(share),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)) ) |>
separate(video, c("video", "fake")) |>
#mutate(type = if_else(type == "R", "Real", "Deepfake")) |>
select(-SHARE, -DNSHARE)
## Warning: Expected 4 pieces. Additional pieces discarded in 1621 rows [6, 12, 18, 24, 30,
## 36, 42, 48, 54, 60, 66, 72, 79, 86, 93, 100, 107, 114, 121, 122, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 756 rows [73, 80, 87, 94,
## 101, 108, 115, 123, 130, 137, 144, 152, 232, 239, 246, 253, 260, 267, 274, 281,
## ...].
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <-pol_behavior_df %>% filter(fake == 'DF')
r_data <- pol_behavior_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of Political DF and R between Control and Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c("Control", "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
library(dplyr)
# Filter the data for the DF group
df_data <- filter(pol_behavior_df, fake == 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] 0.1573057
##
## $percentage_difference
## [1] 10.22487
library(dplyr)
# Filter the data for the DF group
df_data <- filter(pol_behavior_df, fake == 'R')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] -0.09084249
##
## $percentage_difference
## [1] -5.032468
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_behavior_df, fake == 'R' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_behavior_df, fake == 'R' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = 1.1148, df = 765.27, p-value = 0.2653
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06912972 0.25081471
## sample estimates:
## mean of x mean of y
## 1.805128 1.714286
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(pol_behavior_df, fake == 'DF' & condition == 'Control')$share_numerical
r_treatment_share <- filter(pol_behavior_df, fake == 'DF' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = -1.9015, df = 736.25, p-value = 0.05763
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.319714741 0.005103426
## sample estimates:
## mean of x mean of y
## 1.538462 1.695767
library(dplyr)
library(broom)
# Split the data into treatment and control groups
treatment_data <- filter(pol_behavior_df, condition == 'Treatment')
control_data <- filter(pol_behavior_df, condition == 'Control')
# Run linear regression models for each group
treatment_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = treatment_data)
control_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = control_data)
# Summarize the models
treatment_summary <- tidy(treatment_model)
control_summary <- tidy(control_model)
# Output the summaries
treatment_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.192 0.139 1.38 1.67e- 1
## 2 impt_numerical 0.146 0.0318 4.60 5.03e- 6
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.476 0.0366 13.0 4.28e-35
## 5 familiar_numerical -0.0113 0.0269 -0.418 6.76e- 1
control_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.380 0.139 2.74 6.24e- 3
## 2 impt_numerical 0.123 0.0301 4.08 4.90e- 5
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.364 0.0335 10.9 1.05e-25
## 5 familiar_numerical 0.00598 0.0270 0.221 8.25e- 1
#ploting to a graph
library(dplyr)
library(ggplot2)
# Split the data into treatment and control groups
treatment_data <- filter(pol_behavior_df, condition == 'Treatment')
control_data <- filter(pol_behavior_df, condition == 'Control')
# Function to plot linear models
plot_lm <- function(df, condition) {
variables <- c('impt_numerical', 'interest_numerical', 'novel_numerical', 'familiar_numerical')
# Create a list to store plots
plot_list <- list()
for (var in variables) {
# Fit the linear model
model <- lm(share_numerical ~ ., data = df[, c('share_numerical', var)])
# Extract the R-squared value
r_squared <- summary(model)$r.squared
# Create the plot
p <- ggplot(df, aes_string(x = var, y = 'share_numerical')) +
geom_point(alpha = 0.5) +
geom_smooth(method = 'lm', formula = y ~ x, color = 'red') +
labs(title = paste(var, 'Effect in', condition, 'Condition'),
subtitle = paste('R-squared =', round(r_squared, 2)),
x = var,
y = 'Share Numerical')
# Add the plot to the list
plot_list[[var]] <- p
}
return(plot_list)
}
# Plot for treatment group
treatment_plots <- plot_lm(treatment_data, 'Treatment')
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot for control group
control_plots <- plot_lm(control_data, 'Control')
# Display the plots
treatment_plots$impt_numerical
treatment_plots$interest_numerical
treatment_plots$novel_numerical
treatment_plots$familiar_numerical
control_plots$impt_numerical
control_plots$interest_numerical
control_plots$novel_numerical
control_plots$familiar_numerical
# Clean Political data frame separating video into deepfake/real, share likely values and,
ent_behavior_df <- data_raw_ent |> filter(Finished == "True") |>
select(ResponseId, matches("_DF_"), matches("_R_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail")) |>
group_by(ResponseId) |>
mutate(condition = if_else(any(grepl("BELIEVE", question)), "Treatment", "Control")) |>
ungroup() |>
filter( question_detail != "REASONS", question_detail != "OTHER") |>
select(-question_detail) |>
pivot_wider(names_from = question, values_from = value) |>
unite("video", video, fake) |>
mutate(
SHARE = str_to_lower(SHARE),
share = ordered(SHARE,levels = likely_values),
share_numerical = as.numeric(share),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)) ) |>
separate(video, c("video", "fake")) |>
#mutate(type = if_else(type == "R", "Real", "Deepfake")) |>
select(-SHARE, -DNSHARE)
## Warning: Expected 4 pieces. Additional pieces discarded in 1610 rows [7, 14, 21, 28, 35,
## 42, 49, 56, 63, 70, 77, 84, 90, 96, 102, 108, 114, 120, 126, 132, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 780 rows [1, 8, 15, 22,
## 29, 36, 43, 50, 57, 64, 71, 78, 230, 237, 244, 251, 258, 265, 272, 279, ...].
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <-ent_behavior_df %>% filter(fake == 'DF')
r_data <- ent_behavior_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(share_numerical), sd = sd(share_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean Shareing intention", title = "Mean Sharing behavior of Entertainment DF and R between Control and Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c("Control", "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
library(dplyr)
# Filter the data for the DF group
df_data <- filter(ent_behavior_df, fake == 'DF')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] -0.2302083
##
## $percentage_difference
## [1] -12.81159
library(dplyr)
# Filter the data for the DF group
df_data <- filter(ent_behavior_df, fake == 'R')
# Group the data by condition and calculate the mean of 'share_numerical' for the DF group
df_grouped <- df_data %>%
group_by(condition) %>%
summarise(mean_share_numerical = mean(share_numerical), .groups = 'drop')
# Extract the means for Treatment and Control
mean_treatment <- df_grouped %>% filter(condition == "Treatment") %>% pull(mean_share_numerical)
mean_control <- df_grouped %>% filter(condition == "Control") %>% pull(mean_share_numerical)
# Calculate the difference and the percentage difference
df_difference <- mean_treatment - mean_control
df_percentage_difference <- (df_difference / mean_control) * 100
# Output the difference and the percentage difference
list(difference = df_difference, percentage_difference = df_percentage_difference)
## $difference
## [1] -0.4151442
##
## $percentage_difference
## [1] -17.91184
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(ent_behavior_df, fake == 'R' & condition == 'Control')$share_numerical
r_treatment_share <- filter(ent_behavior_df, fake == 'R' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = 3.9305, df = 759.96, p-value = 9.251e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2077996 0.6224888
## sample estimates:
## mean of x mean of y
## 2.317708 1.902564
# Separate the belief levels for 'DF' in each condition
r_control_share <- filter(ent_behavior_df, fake == 'DF' & condition == 'Control')$share_numerical
r_treatment_share <- filter(ent_behavior_df, fake == 'DF' & condition == 'Treatment')$share_numerical
# Perform the t-test
t_test_result_r <- t.test(r_control_share, r_treatment_share)
# Output the t-test result
print(t_test_result_r)
##
## Welch Two Sample t-test
##
## data: r_control_share and r_treatment_share
## t = 2.5646, df = 739.48, p-value = 0.01052
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05398812 0.40642855
## sample estimates:
## mean of x mean of y
## 1.796875 1.566667
library(dplyr)
library(broom)
# Split the data into treatment and control groups
treatment_data <- filter(ent_behavior_df, condition == 'Treatment')
control_data <- filter(ent_behavior_df, condition == 'Control')
# Run linear regression models for each group
treatment_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = treatment_data)
control_model <- lm(share_numerical ~ impt_numerical + interest_numerical + novel_numerical + familiar_numerical, data = control_data)
# Summarize the models
treatment_summary <- tidy(treatment_model)
control_summary <- tidy(control_model)
# Output the summaries
treatment_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.316 0.116 -2.72 6.72e- 3
## 2 impt_numerical 0.259 0.0355 7.30 7.36e-13
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.481 0.0360 13.4 8.16e-37
## 5 familiar_numerical 0.0729 0.0259 2.81 5.09e- 3
control_summary
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.298 0.125 -2.39 1.72e- 2
## 2 impt_numerical 0.344 0.0402 8.55 6.69e-17
## 3 interest_numerical NA NA NA NA
## 4 novel_numerical 0.546 0.0437 12.5 8.65e-33
## 5 familiar_numerical 0.0255 0.0288 0.886 3.76e- 1
#ploting to a graph
library(dplyr)
library(ggplot2)
# Split the data into treatment and control groups
treatment_data <- filter(ent_behavior_df, condition == 'Treatment')
control_data <- filter(ent_behavior_df, condition == 'Control')
# Function to plot linear models
plot_lm <- function(df, condition) {
variables <- c('impt_numerical', 'interest_numerical', 'novel_numerical', 'familiar_numerical')
# Create a list to store plots
plot_list <- list()
for (var in variables) {
# Fit the linear model
model <- lm(share_numerical ~ ., data = df[, c('share_numerical', var)])
# Extract the R-squared value
r_squared <- summary(model)$r.squared
# Create the plot
p <- ggplot(df, aes_string(x = var, y = 'share_numerical')) +
geom_point(alpha = 0.5) +
geom_smooth(method = 'lm', formula = y ~ x, color = 'red') +
labs(title = paste(var, 'Effect in', condition, 'Condition'),
subtitle = paste('R-squared =', round(r_squared, 2)),
x = var,
y = 'Share Numerical')
# Add the plot to the list
plot_list[[var]] <- p
}
return(plot_list)
}
# Plot for treatment group
treatment_plots <- plot_lm(treatment_data, 'Treatment')
# Plot for control group
control_plots <- plot_lm(control_data, 'Control')
# Display the plots
treatment_plots$impt_numerical
treatment_plots$interest_numerical
treatment_plots$novel_numerical
treatment_plots$familiar_numerical
control_plots$impt_numerical
control_plots$interest_numerical
control_plots$novel_numerical
control_plots$familiar_numerical
pol_beliveble_vs_sharability_df <-data_raw_pol |> filter(Finished == "True") |>
select(ResponseId, matches ("_R_"), matches ("_DF_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail"), "_", extra= "merge") |>
filter (question != "SHARE", question != "DNSHARE") |>
separate(question, c("question", "other")) |>
select(-question_detail, -other)|>
pivot_wider(names_from = question, values_from = value) |>
mutate (condition = if_else(is.na(BELIEVE), "Control", "Treatment" )) |>
select (ResponseId, fake, video, condition,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
filter (condition=="Treatment") |>
mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
BELIEVE= str_to_lower(BELIEVE),
believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8436 rows [1, 2, 3, 4, 5,
## 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 756 rows [61, 67, 73, 79,
## 85, 91, 97, 103, 109, 115, 121, 127, 193, 199, 205, 211, 217, 223, 229, 235,
## ...].
#Belive behavior in the treatment condition
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <-pol_beliveble_vs_sharability_df %>% filter(fake == 'DF')
r_data <- pol_beliveble_vs_sharability_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(believe_numerical), sd = sd(believe_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(believe_numerical), sd = sd(believe_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean believe behavior ", title = "Mean believe behavior of Political DF and R in the Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c( "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
#Entertainment belivevability
ent_beliveble_vs_sharability_df <-data_raw_ent |> filter(Finished == "True") |>
select(ResponseId, matches ("_R_"), matches ("_DF_")) |>
pivot_longer(-ResponseId, values_drop_na = TRUE) |>
separate(name, c("video", "fake", "question", "question_detail"), "_", extra= "merge") |>
filter (question != "SHARE", question != "DNSHARE") |>
separate(question, c("question", "other")) |>
select(-question_detail, -other)|>
pivot_wider(names_from = question, values_from = value) |>
mutate (condition = if_else(is.na(BELIEVE), "Control", "Treatment" )) |>
select (ResponseId, fake, video, condition,SHARE, BELIEVE, IMPT, NOVEL, INTEREST, FAMILIAR)|>
filter (condition=="Treatment") |>
mutate(SHARE = str_to_lower(SHARE),share_numerical = as.numeric(ordered(SHARE, levels = likely_values)),
BELIEVE= str_to_lower(BELIEVE),
believe_numerical = as.numeric(ordered(BELIEVE, levels = believe_levels)),
IMPT= str_to_lower(IMPT),
impt_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
INTEREST = str_to_lower(INTEREST),
INTEREST = str_replace (INTEREST, "uninterested" , "not interested" ),
interest_numerical = as.numeric(ordered(IMPT, levels = importance_levels)),
FAMILIAR = str_to_lower (FAMILIAR),
familiar_numerical = as.numeric(ordered(FAMILIAR, levels = familiar_levels)),
NOVEL = str_to_lower(NOVEL),
novel_numerical = as.numeric(ordered(NOVEL, levels = novel_levels)))
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8520 rows [1, 2, 3, 4, 5,
## 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 780 rows [1, 7, 13, 19,
## 25, 31, 37, 43, 49, 55, 61, 67, 193, 199, 205, 211, 217, 223, 229, 235, ...].
library(readxl)
library(dplyr)
library(ggplot2)
# Filter the data for 'DF' and 'R' groups
df_data <-ent_beliveble_vs_sharability_df %>% filter(fake == 'DF')
r_data <- ent_beliveble_vs_sharability_df %>% filter(fake != 'DF')
# Calculate mean and standard deviation for the 'share_numerical' values for each group within each condition
df_stats <- df_data %>%
group_by(condition) %>%
summarise(mean = mean(believe_numerical), sd = sd(believe_numerical), .groups = 'drop')
r_stats <- r_data %>%
group_by(condition) %>%
summarise(mean = mean(believe_numerical), sd = sd(believe_numerical), .groups = 'drop')
# Combine the data
plot_data <- bind_rows(
mutate(df_stats, group = "DF"),
mutate(r_stats, group = "R")
)
# Create the plot
ggplot(plot_data, aes(x = condition, y = mean, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd, group = group),
position = position_dodge(0.7), width = 0.25
) +
scale_fill_manual(values = c("DF" = "red", "R" = "green")) +
labs(x = "Condition", y = "Mean believein behavior ", title = "Mean belive behavior of Entertainment DF and R in the Treatment condition") +
theme_minimal() +
theme(legend.title = element_blank()) +
scale_x_discrete(labels = c( "Treatment")) +
guides(fill = guide_legend(title = "Group", override.aes = list(colour = c("red", "green"))))
# Show the plot
ggsave("bar_plot_with_error_bars.png", width = 10, height = 8, dpi = 300)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.