#load both datasetsqualtrics_data <-read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/Qualtrics Data.csv")demographic_data <-read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/demographics data.csv")#do a full joinraw_data <-full_join(x = qualtrics_data, y = demographic_data, by ="prolific_id")
Warning in full_join(x = qualtrics_data, y = demographic_data, by = "prolific_id"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 826 of `x` matches multiple rows in `y`.
ℹ Row 1340 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
#going to remove if finish = false to keep only finished surveysraw_data <- raw_data |>filter(finished ==TRUE|is.na(finished))#going to remove specific prolific IDs that provided low-quality answers, duplicatesids_to_remove <-c("65623a563d9d8a788f63db01", "66dcf078a9631d32a4d7e4e9", "5d8a64630ffb9c001746ca2c", "66a252056819ec081dcb1a2e", "659ffc7d7d21a61dbc761173", "6637b21240220c2517fffa1a", "664f42008eff493e94fd9f00", "6140c38ec9e5109e65060a70")raw_data <- raw_data |>filter(!prolific_id %in% ids_to_remove)#going to remove any observation with a N/A valueraw_data <-na.omit(raw_data)#check to make sure no duplicatesduplicated_ids <- raw_data |>filter(duplicated(prolific_id))print(duplicated_ids)
#bringing all the data into one of 4 columns data <- raw_data |>mutate(opinion_bill =str_c(Q1_1, Q1_2, Q1_3, Q1_4, Q1_5, Q1_6, Q1_7, Q1_8, Q1_9, Q1_10, Q1_11, Q1_12)) |>mutate(support_election =str_c(Q2_1, Q2_2, Q2_3, Q2_4, Q2_5, Q2_6, Q2_7, Q2_8, Q2_9, Q2_10, Q2_11, Q2_12)) |>mutate(threaten_country =str_c(Q3_1, Q3_2, Q3_3, Q3_4, Q3_5, Q3_6, Q3_7, Q3_8, Q3_9, Q3_10, Q3_11, Q3_12)) |>mutate(reasoning =str_c(Q4_1, Q4_2, Q4_3, Q4_4, Q4_5, Q4_6, Q4_7, Q4_8, Q4_9, Q4_10, Q4_11, Q4_12)) |>relocate(opinion_bill:reasoning, .after = sex)#assign conditionsdata <- data |>mutate(condition =ifelse((Q1_1!=''), 1, ifelse((Q1_2!=''), 2,ifelse((Q1_3!=''), 3,ifelse((Q1_4!=''), 4, ifelse((Q1_5!=''), 5, ifelse((Q1_6!=''), 6, ifelse((Q1_7!=''), 7, ifelse((Q1_8!=''), 8, ifelse((Q1_9!=''), 9, ifelse((Q1_10!=''), 10, ifelse((Q1_11!=''), 11, ifelse((Q1_12!=''), 12, NA) ))))))))))))#relocate condition to the frontdata <- data |>relocate(condition, .after = sex)# organize datadata <- data |>select(prolific_id, survey_party_id, prolific_party_id, age, ethnicity, sex, condition, opinion_bill, support_election, threaten_country, reasoning)data <-na.omit(data)head(data)
prolific_id survey_party_id prolific_party_id age
1 644123f5b400058bcb5b2e7a Very conservative Republican 62
2 6696a09612a7c019e4d67a7c Very conservative Republican 55
3 5efc92ff0352d10480e02cdc Liberal Democrat 26
4 6701e7c8b53a034b66f93fd8 Moderate Republican 24
5 66294441f6545cc65fd7e23f Liberal Democrat 49
6 65e22b2cb3efd22e847d0bf0 Somewhat conservative Republican 65
ethnicity sex condition opinion_bill support_election
1 White Male 9 Strongly oppose Strongly oppose
2 White Female 6 Strongly oppose Strongly oppose
3 White Female 3 Oppose Somewhat support
4 White Female 10 Support Somewhat support
5 White Female 8 Somewhat oppose Neither support nor oppose
6 White Female 11 Strongly oppose Neither support nor oppose
threaten_country
1 Strongly disagree
2 Strongly disagree
3 Neither agree nor disagree
4 Agree
5 Somewhat disagree
6 Strongly disagree
reasoning
1 Adding or deletion of seats to change the balance of power is a poor way to try to obtain what you want without going to the voting booth.
2 This is a way of adding judges to accommodate the political views of one party or the other. This would be extremely manipulative.
3 At the moment, I don't think I know enough about the ramifications of that bill to vote for or against it. I'm uncertain if one individual like the governor should hold that type of power and be able to change the seating composition whenever they'd like. I think having a more direct democratic approach would be better.
4 I think it is important to protect our leaders. However, as long as people have a peaceful protest, that will catch the attention of our leaders..
5 Sources won’t talk if they might be exposed, and most journalists have “secret” information in order to find the truth.
6 The first amendment to the constitution protects free speech. Having a journalist have to name sources is going to cause many people to stay silent.
Recode Variables
#recode respondent party id as provided by prolificrecoded_data <- data |>mutate(prolific_partyid =as.factor(prolific_party_id))|>#recode respondent party id as provided by the surveymutate(survey_partyid =recode(survey_party_id, "Very conservative"=1, "Conservative"=2,"Somewhat conservative"=3,"Moderate"=4,"Haven't thought much about this"=4,"Somewhat liberal"=5,"Liberal"=6,"Very liberal"=7))|>#recode respondent sexmutate(respondent_gender =as.factor(sex))|>mutate(respondent_gender =fct_relevel(respondent_gender, "Male")) |>mutate(respondent_gender =fct_recode(respondent_gender, "Male Respondent"="Male", "Female Respondent"="Female")) |>#recode ethnicitymutate(ethnicity =recode(ethnicity, "White"=0,"Black"=1, "Asian"=2,"Mixed"=3,"Other"=4,.default =NA_real_))
Recode DVs
recoded_data <- recoded_data |>mutate(opinion_bill =recode(opinion_bill, "Strongly oppose"=1,"Oppose"=2,"Somewhat oppose"=3,"Neither support nor oppose"=4,"Somewhat support"=5,"Support"=6,"6"=6,"Strongly Support"=7,"Strongly support"=7,"7"=7)) |>mutate(support_election =recode(support_election,"Strongly oppose"=1,"Oppose"=2,"Somewhat oppose"=3,"Neither support nor oppose"=4,"Somewhat support"=5,"Support"=6,"Strongly support"=7)) |># recode recognition of antidemocratic behavior# To what extend does this protect our country?# 1 = agree that they protect our country/low recognition of antidemocratic behavior# 7 = disagree that they protect our country/high recognition of antidemocratic behaviormutate(threaten_country =recode(threaten_country, "Strongly disagree"=7,"Disagree"=6,"Somewhat disagree"=5,"Neither agree nor disagree"=4,"Somewhat agree"=3,"Agree"=2,"Strongly agree"=1))
prolific_id survey_party_id prolific_party_id age
1 644123f5b400058bcb5b2e7a Very conservative Republican 62
2 6696a09612a7c019e4d67a7c Very conservative Republican 55
3 5efc92ff0352d10480e02cdc Liberal Democrat 26
4 6701e7c8b53a034b66f93fd8 Moderate Republican 24
5 66294441f6545cc65fd7e23f Liberal Democrat 49
6 65e22b2cb3efd22e847d0bf0 Somewhat conservative Republican 65
ethnicity sex condition opinion_bill support_election threaten_country
1 0 Male 9 1 1 7
2 0 Female 6 1 1 7
3 0 Female 3 2 5 4
4 0 Female 10 6 5 2
5 0 Female 8 3 4 5
6 0 Female 11 1 4 7
reasoning
1 Adding or deletion of seats to change the balance of power is a poor way to try to obtain what you want without going to the voting booth.
2 This is a way of adding judges to accommodate the political views of one party or the other. This would be extremely manipulative.
3 At the moment, I don't think I know enough about the ramifications of that bill to vote for or against it. I'm uncertain if one individual like the governor should hold that type of power and be able to change the seating composition whenever they'd like. I think having a more direct democratic approach would be better.
4 I think it is important to protect our leaders. However, as long as people have a peaceful protest, that will catch the attention of our leaders..
5 Sources won’t talk if they might be exposed, and most journalists have “secret” information in order to find the truth.
6 The first amendment to the constitution protects free speech. Having a journalist have to name sources is going to cause many people to stay silent.
prolific_partyid survey_partyid respondent_party_for_inparty_outparty
1 Republican 1 1
2 Republican 1 1
3 Democrat 6 -1
4 Republican 4 0
5 Democrat 6 -1
6 Republican 3 1
respondent_gender politician_gender politician_party
1 Male Respondent Male Politician Republican
2 Female Respondent Female Politician Democrat
3 Female Respondent Male Politician Democrat
4 Female Respondent Female Politician Republican
5 Female Respondent Male Politician Republican
6 Female Respondent Female Politician Republican
politician_party_for_recoding inparty_outparty
1 1 Inparty
2 -1 Outparty
3 -1 Inparty
4 1 Outparty
5 1 Outparty
6 1 Inparty
Save Dataframe with bottom 10% of survey respondents cut
duration_data <-read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/survey duration.csv")final_data_with_durations <-left_join(x = final_data, y = duration_data, by ="prolific_id")head(final_data_with_durations)
threshold <-quantile(final_data_with_durations$duration, 0.10, na.rm =TRUE)# Filter out rows with duration less than or equal to the 10th percentilefinal_data_with_durations <- final_data_with_durations[final_data_with_durations$duration > threshold, ]saveRDS(final_data_with_durations, "final_data_with_durations.rds")
Summary Statistics for Conditions
### sex crosstabsdata_proportions_gender <- recoded_data |>count(respondent_gender, condition) |>group_by(condition) |>mutate(Female = n /sum(n)*100)|>mutate(Male = (100-Female)) |>select(condition, Female, Male) |>ungroup() |>slice(1:12) |>print()