HITM Survey Data

Importing data

#load both datasets
qualtrics_data <- read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/Qualtrics Data.csv")

demographic_data <- read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/demographics data.csv")

#do a full join
raw_data <- full_join(x = qualtrics_data, y = demographic_data, by = "prolific_id")
Warning in full_join(x = qualtrics_data, y = demographic_data, by = "prolific_id"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 826 of `x` matches multiple rows in `y`.
ℹ Row 1340 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
# rearrange columns

raw_data <- raw_data |> 
  relocate(prolific_party_id, .after = survey_party_id) |> 
  relocate(age, .after = prolific_party_id) |> 
  relocate(ethnicity, .after = age) |> 
  relocate(sex, .after = ethnicity)

Cleaning data

#going to remove if finish = false to keep only finished surveys

raw_data <- raw_data |> 
  filter(finished == TRUE | is.na(finished))

#going to remove specific prolific IDs that provided low-quality answers, duplicates

ids_to_remove <- c("65623a563d9d8a788f63db01", "66dcf078a9631d32a4d7e4e9", "5d8a64630ffb9c001746ca2c", "66a252056819ec081dcb1a2e", "659ffc7d7d21a61dbc761173", "6637b21240220c2517fffa1a", "664f42008eff493e94fd9f00", "6140c38ec9e5109e65060a70")

raw_data <- raw_data |> 
  filter(!prolific_id %in% ids_to_remove)

#going to remove any observation with a N/A value

raw_data <- na.omit(raw_data)

#check to make sure no duplicates
duplicated_ids <- raw_data |> 
  filter(duplicated(prolific_id))

print(duplicated_ids)
 [1] survey_progress   finished          prolific_id       survey_party_id  
 [5] prolific_party_id age               ethnicity         sex              
 [9] Q1_1              Q2_1              Q3_1              Q4_1             
[13] Q1_2              Q2_2              Q3_2              Q4_2             
[17] Q1_3              Q2_3              Q3_3              Q4_3             
[21] Q1_4              Q2_4              Q3_4              Q4_4             
[25] Q1_5              Q2_5              Q3_5              Q4_5             
[29] Q1_6              Q2_6              Q3_6              Q4_6             
[33] Q1_7              Q2_7              Q3_7              Q4_7             
[37] Q1_8              Q2_8              Q3_8              Q4_8             
[41] Q1_9              Q2_9              Q3_9              Q4_9             
[45] Q1_10             Q2_10             Q3_10             Q4_10            
[49] Q1_11             Q2_11             Q3_11             Q4_11            
[53] Q1_12             Q2_12             Q3_12             Q4_12            
<0 rows> (or 0-length row.names)

Assign to condition numbers

#bringing all the data into one of 4 columns 
data <- raw_data |> 
  mutate(opinion_bill = str_c(Q1_1, Q1_2, Q1_3, Q1_4, Q1_5, Q1_6, Q1_7, Q1_8, Q1_9, Q1_10, Q1_11, Q1_12)) |> 
  mutate(support_election = str_c(Q2_1, Q2_2, Q2_3, Q2_4, Q2_5, Q2_6, Q2_7, Q2_8, Q2_9, Q2_10, Q2_11, Q2_12)) |> 
  mutate(threaten_country = str_c(Q3_1, Q3_2, Q3_3, Q3_4, Q3_5, Q3_6, Q3_7, Q3_8, Q3_9, Q3_10, Q3_11, Q3_12)) |> 
  mutate(reasoning = str_c(Q4_1, Q4_2, Q4_3, Q4_4, Q4_5, Q4_6, Q4_7, Q4_8, Q4_9, Q4_10, Q4_11, Q4_12)) |> 
  relocate(opinion_bill:reasoning, .after = sex)

#assign conditions

data <- data |> 
    mutate(condition = ifelse((Q1_1!=''), 1, 
                              ifelse((Q1_2!=''), 2,
                              ifelse((Q1_3!=''), 3,
                              ifelse((Q1_4!=''), 4,       
                              ifelse((Q1_5!=''), 5,       
                              ifelse((Q1_6!=''), 6,       
                              ifelse((Q1_7!=''), 7,       
                              ifelse((Q1_8!=''), 8,       
                              ifelse((Q1_9!=''), 9,       
                              ifelse((Q1_10!=''), 10,       
                              ifelse((Q1_11!=''), 11,       
                              ifelse((Q1_12!=''), 12,       
                                     NA)
 
               ))))))))))))

#relocate condition to the front
data <- data |> 
  relocate(condition, .after = sex)

# organize data
data <- data |> 
  select(prolific_id, survey_party_id, prolific_party_id, age, ethnicity, sex, condition, opinion_bill, support_election, threaten_country, reasoning)


data <- na.omit(data)

head(data)
               prolific_id       survey_party_id prolific_party_id age
1 644123f5b400058bcb5b2e7a     Very conservative        Republican  62
2 6696a09612a7c019e4d67a7c     Very conservative        Republican  55
3 5efc92ff0352d10480e02cdc               Liberal          Democrat  26
4 6701e7c8b53a034b66f93fd8              Moderate        Republican  24
5 66294441f6545cc65fd7e23f               Liberal          Democrat  49
6 65e22b2cb3efd22e847d0bf0 Somewhat conservative        Republican  65
  ethnicity    sex condition    opinion_bill           support_election
1     White   Male         9 Strongly oppose            Strongly oppose
2     White Female         6 Strongly oppose            Strongly oppose
3     White Female         3          Oppose           Somewhat support
4     White Female        10         Support           Somewhat support
5     White Female         8 Somewhat oppose Neither support nor oppose
6     White Female        11 Strongly oppose Neither support nor oppose
            threaten_country
1          Strongly disagree
2          Strongly disagree
3 Neither agree nor disagree
4                      Agree
5          Somewhat disagree
6          Strongly disagree
                                                                                                                                                                                                                                                                                                                          reasoning
1                                                                                                                                                                                        Adding or deletion of seats to change the balance of power is a poor way to try to obtain what you want without going to the voting booth.
2                                                                                                                                                                                                This is a way of adding judges to accommodate the political views of one party or the other. This would be extremely manipulative.
3 At the moment, I don't think I know enough about the ramifications of that bill to vote for or against it. I'm uncertain if one individual like the governor should hold that type of power and be able to change the seating composition whenever they'd like. I think having a more direct democratic approach would be better.
4                                                                                                                                                                                I think it is important to protect our leaders. However, as long as people have a peaceful protest, that will catch the attention of our leaders..
5                                                                                                                                                                                                           Sources won’t talk if they might be exposed, and most journalists have “secret” information in order to find the truth.
6                                                                                                                                                                              The first amendment to the constitution protects free speech. Having a journalist have to name sources is going to cause many people to stay silent.

Recode Variables

#recode respondent party id as provided by prolific
recoded_data <- data |> 
  mutate(prolific_partyid = as.factor(prolific_party_id))|> 
  

  #recode respondent party id as provided by the survey
   mutate(survey_partyid = recode(survey_party_id, 
                                            "Very conservative" = 1, 
                                            "Conservative" = 2,
                                            "Somewhat conservative" = 3,
                                            "Moderate" = 4,
                                            "Haven't thought much about this" = 4,
                                            "Somewhat liberal" = 5,
                                            "Liberal" = 6,
                                            "Very liberal" = 7))|> 
  
#recode respondent sex
  mutate(respondent_gender = as.factor(sex))|>  
  mutate(respondent_gender = fct_relevel(respondent_gender, "Male")) |>
  mutate(respondent_gender = fct_recode(respondent_gender, "Male Respondent" = "Male", "Female Respondent" = "Female")) |> 

  
#recode ethnicity
  mutate(ethnicity = recode(ethnicity, 
                            "White" = 0,
                            "Black" = 1, 
                            "Asian" = 2,
                            "Mixed" = 3,
                            "Other" = 4,
                            .default = NA_real_)) 

Recode DVs

recoded_data <- recoded_data |> 
 mutate(opinion_bill = recode(opinion_bill, 
                               "Strongly oppose" = 1,
                               "Oppose" = 2,
                               "Somewhat oppose" = 3,
                               "Neither support nor oppose" = 4,
                               "Somewhat support" = 5,
                               "Support" = 6,
                               "6" = 6,
                               "Strongly Support" = 7,
                               "Strongly support" = 7,
                               "7" = 7)) |>  
  
  mutate(support_election = recode(support_election,
                              "Strongly oppose" = 1,
                               "Oppose" = 2,
                               "Somewhat oppose" = 3,
                               "Neither support nor oppose" = 4,
                               "Somewhat support" = 5,
                               "Support" = 6,
                               "Strongly support" = 7)) |> 
  
# recode recognition of antidemocratic behavior
# To what extend does this protect our country?
# 1 = agree that they protect our country/low recognition of antidemocratic behavior
# 7 = disagree that they protect our country/high recognition of antidemocratic behavior

  
  mutate(threaten_country = recode(threaten_country, 
                              "Strongly disagree" = 7,
                               "Disagree" = 6,
                               "Somewhat disagree" = 5,
                               "Neither agree nor disagree" = 4,
                               "Somewhat agree" = 3,
                               "Agree" = 2,
                               "Strongly agree" = 1))

Create Variables for politician gender and party

recoded_data <- recoded_data |> 
  mutate(politician_gender = recode(condition,
                                    "1" = "Male Politician",
                                    "2" = "Male Politician",
                                    "3" = "Male Politician",
                                    "7" = "Male Politician",
                                    "8" = "Male Politician",
                                    "9"=  "Male Politician",
                                    "4" = "Female Politician",
                                    "5" = "Female Politician",
                                    "6" = "Female Politician",
                                    "10" ="Female Politician",
                                    "11" ="Female Politician",
                                    "12" ="Female Politician")) |> 
  mutate(politician_gender = as.factor(politician_gender)) |> 
  mutate(politician_gender = fct_relevel(politician_gender, "Male Politician")) 

#assign politician party based on condition
recoded_data <- recoded_data |> 
  mutate(politician_party = recode(condition,
                                    "1" = "Democrat",
                                    "2" =  "Democrat",
                                    "3" =  "Democrat",
                                    "4" =  "Democrat",
                                    "5" =  "Democrat",
                                    "6"= "Democrat",
                                    "7" =  "Republican",
                                    "8" =  "Republican",
                                    "9" =  "Republican",
                                    "10" = "Republican",
                                    "11" = "Republican",
                                    "12" = "Republican")) |> 
  mutate(politician_party = as.factor(politician_party)) 

Create in/outparty variable

recoded_data <- recoded_data |> 
  mutate(respondent_party_for_inparty_outparty = recode(survey_party_id, 
                                            "Very conservative" = 1, 
                                            "Conservative" = 1,
                                            "Somewhat conservative" = 1,
                                            "Moderate" = 0,
                                            "Haven't thought much about this" = 0,
                                            "Somewhat liberal" = -1,
                                            "Liberal" = -1, 
                                            "Very liberal" = -1)) |> 
  
  
  relocate(respondent_party_for_inparty_outparty, .after = survey_partyid) |> 
  mutate(politician_party_for_recoding = recode(condition,
                                    "1" = -1,
                                    "2" = -1,
                                    "3" = -1,
                                    "4" = -1,
                                    "5" = -1,
                                    "6"=  -1,
                                    "7" =  1,
                                    "8" =  1,
                                    "9" =  1,
                                    "10" = 1,
                                    "11" = 1,
                                    "12" = 1)) |> 
  mutate(inparty_outparty = case_when(
    respondent_party_for_inparty_outparty == 0 ~ 1,  # Moderate respondents = outparty
    respondent_party_for_inparty_outparty == politician_party_for_recoding ~ 0,  #inparty = 0
    TRUE ~ 1), #outparty = 1 
    inparty_outparty = factor(inparty_outparty, levels = c(1, 0), labels = c("Outparty", "Inparty"))
  )


head(recoded_data)
               prolific_id       survey_party_id prolific_party_id age
1 644123f5b400058bcb5b2e7a     Very conservative        Republican  62
2 6696a09612a7c019e4d67a7c     Very conservative        Republican  55
3 5efc92ff0352d10480e02cdc               Liberal          Democrat  26
4 6701e7c8b53a034b66f93fd8              Moderate        Republican  24
5 66294441f6545cc65fd7e23f               Liberal          Democrat  49
6 65e22b2cb3efd22e847d0bf0 Somewhat conservative        Republican  65
  ethnicity    sex condition opinion_bill support_election threaten_country
1         0   Male         9            1                1                7
2         0 Female         6            1                1                7
3         0 Female         3            2                5                4
4         0 Female        10            6                5                2
5         0 Female         8            3                4                5
6         0 Female        11            1                4                7
                                                                                                                                                                                                                                                                                                                          reasoning
1                                                                                                                                                                                        Adding or deletion of seats to change the balance of power is a poor way to try to obtain what you want without going to the voting booth.
2                                                                                                                                                                                                This is a way of adding judges to accommodate the political views of one party or the other. This would be extremely manipulative.
3 At the moment, I don't think I know enough about the ramifications of that bill to vote for or against it. I'm uncertain if one individual like the governor should hold that type of power and be able to change the seating composition whenever they'd like. I think having a more direct democratic approach would be better.
4                                                                                                                                                                                I think it is important to protect our leaders. However, as long as people have a peaceful protest, that will catch the attention of our leaders..
5                                                                                                                                                                                                           Sources won’t talk if they might be exposed, and most journalists have “secret” information in order to find the truth.
6                                                                                                                                                                              The first amendment to the constitution protects free speech. Having a journalist have to name sources is going to cause many people to stay silent.
  prolific_partyid survey_partyid respondent_party_for_inparty_outparty
1       Republican              1                                     1
2       Republican              1                                     1
3         Democrat              6                                    -1
4       Republican              4                                     0
5         Democrat              6                                    -1
6       Republican              3                                     1
  respondent_gender politician_gender politician_party
1   Male Respondent   Male Politician       Republican
2 Female Respondent Female Politician         Democrat
3 Female Respondent   Male Politician         Democrat
4 Female Respondent Female Politician       Republican
5 Female Respondent   Male Politician       Republican
6 Female Respondent Female Politician       Republican
  politician_party_for_recoding inparty_outparty
1                             1          Inparty
2                            -1         Outparty
3                            -1          Inparty
4                             1         Outparty
5                             1         Outparty
6                             1          Inparty

Save Dataframe

final_data <- recoded_data |> 
  select(prolific_id, age, ethnicity,respondent_gender, condition, politician_gender, politician_party, prolific_partyid, survey_partyid, inparty_outparty, opinion_bill, support_election, threaten_country, )
  
head(final_data)
               prolific_id age ethnicity respondent_gender condition
1 644123f5b400058bcb5b2e7a  62         0   Male Respondent         9
2 6696a09612a7c019e4d67a7c  55         0 Female Respondent         6
3 5efc92ff0352d10480e02cdc  26         0 Female Respondent         3
4 6701e7c8b53a034b66f93fd8  24         0 Female Respondent        10
5 66294441f6545cc65fd7e23f  49         0 Female Respondent         8
6 65e22b2cb3efd22e847d0bf0  65         0 Female Respondent        11
  politician_gender politician_party prolific_partyid survey_partyid
1   Male Politician       Republican       Republican              1
2 Female Politician         Democrat       Republican              1
3   Male Politician         Democrat         Democrat              6
4 Female Politician       Republican       Republican              4
5   Male Politician       Republican         Democrat              6
6 Female Politician       Republican       Republican              3
  inparty_outparty opinion_bill support_election threaten_country
1          Inparty            1                1                7
2         Outparty            1                1                7
3          Inparty            2                5                4
4         Outparty            6                5                2
5         Outparty            3                4                5
6          Inparty            1                4                7
saveRDS(final_data, "final_data.rds")

Save Dataframe with bottom 10% of survey respondents cut

duration_data <- read.csv("C:/Users/emmac/OneDrive/Desktop/HITM/Main survey/Raw data and cleaning documents/survey duration.csv")

final_data_with_durations <- left_join(x = final_data, y = duration_data, by = "prolific_id")

head(final_data_with_durations)
               prolific_id age ethnicity respondent_gender condition
1 644123f5b400058bcb5b2e7a  62         0   Male Respondent         9
2 6696a09612a7c019e4d67a7c  55         0 Female Respondent         6
3 5efc92ff0352d10480e02cdc  26         0 Female Respondent         3
4 6701e7c8b53a034b66f93fd8  24         0 Female Respondent        10
5 66294441f6545cc65fd7e23f  49         0 Female Respondent         8
6 65e22b2cb3efd22e847d0bf0  65         0 Female Respondent        11
  politician_gender politician_party prolific_partyid survey_partyid
1   Male Politician       Republican       Republican              1
2 Female Politician         Democrat       Republican              1
3   Male Politician         Democrat         Democrat              6
4 Female Politician       Republican       Republican              4
5   Male Politician       Republican         Democrat              6
6 Female Politician       Republican       Republican              3
  inparty_outparty opinion_bill support_election threaten_country duration
1          Inparty            1                1                7       89
2         Outparty            1                1                7      120
3          Inparty            2                5                4      132
4         Outparty            6                5                2      115
5         Outparty            3                4                5      114
6          Inparty            1                4                7      156
threshold <- quantile(final_data_with_durations$duration, 0.10, na.rm = TRUE)

# Filter out rows with duration less than or equal to the 10th percentile
final_data_with_durations <- final_data_with_durations[final_data_with_durations$duration > threshold, ]

saveRDS(final_data_with_durations, "final_data_with_durations.rds")

Summary Statistics for Conditions

### sex crosstabs

data_proportions_gender <- recoded_data |> 
  count(respondent_gender, condition) |> 
  group_by(condition) |>
  mutate(Female = n / sum(n)*100)|> 
  mutate(Male = (100-Female)) |> 
  select(condition, Female, Male) |> 
  ungroup() |> 
  slice(1:12) |> 
  print()
# A tibble: 12 × 3
   condition Female  Male
       <dbl>  <dbl> <dbl>
 1         1   41.6  58.4
 2         2   46.6  53.4
 3         3   56.8  43.2
 4         4   54.4  45.6
 5         5   48.1  51.9
 6         6   45.5  54.5
 7         7   53.2  46.8
 8         8   51.0  49.0
 9         9   42.3  57.7
10        10   49.4  50.6
11        11   50.9  49.1
12        12   54.5  45.5
# political affiliation crosstabs


data_proportions_party <- recoded_data |> 
   count(prolific_partyid, condition) |> 
  group_by(condition) |>
  mutate(proportion = (n) / sum(n)) |> 
  ungroup()


data_proportions_party <- data_proportions_party |> 
  pivot_wider(names_from = prolific_partyid, values_from = c(n, proportion), values_fill = 0) |> 
  mutate(Democrat = proportion_Democrat*100) |> 
  mutate(Republican = proportion_Republican*100) |> 
  mutate(Independent = proportion_Independent*100) |> 
  select(condition, Democrat, Republican, Independent) |> 
  print()
# A tibble: 12 × 4
   condition Democrat Republican Independent
       <dbl>    <dbl>      <dbl>       <dbl>
 1         1     31.2       26.0        42.9
 2         2     30.1       27.6        42.3
 3         3     25.2       39.4        35.5
 4         4     27.2       26.6        46.2
 5         5     31.6       25.9        42.4
 6         6     25.6       24.4        50  
 7         7     24.1       25.9        50  
 8         8     34.8       31.0        34.2
 9         9     31.5       28.9        39.6
10        10     26.2       29.4        44.4
11        11     32.7       24.5        42.8
12        12     33.3       26.9        39.7
###### ethnicity crosstabs
data_proportions_ethnicity <- recoded_data |> 
  count(ethnicity, condition) |> 
  group_by(condition) |>
  mutate(proportion = (n) / sum(n)) |> 
  ungroup()

 data_proportions_ethnicity <- data_proportions_ethnicity |> 
  pivot_wider(names_from = ethnicity, values_from = c(n, proportion), values_fill = 0) |> 
  mutate(white = proportion_0*100) |> 
  mutate(black = proportion_1*100) |> 
  mutate(asian = proportion_2*100) |> 
  mutate(mixed = proportion_3*100) |> 
  mutate(other = proportion_4*100) |> 
  select(condition, white, black, asian, mixed, other) |> 
  print()
# A tibble: 12 × 6
   condition white black asian mixed other
       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1         1  63.0 11.7   9.09  9.09  7.14
 2         2  58.9 14.7   5.52 16.6   4.29
 3         3  58.7 12.9   5.81 12.3  10.3 
 4         4  69.0  8.86  4.43 10.1   7.59
 5         5  62.0 14.6   7.59  8.23  7.59
 6         6  64.7  8.97  7.05 12.8   6.41
 7         7  60.1 11.4   7.59 12.7   8.23
 8         8  55.5 12.9   9.03 12.3  10.3 
 9         9  62.4 13.4   7.38  9.40  7.38
10        10  64.4 11.9   5    10.6   8.12
11        11  69.2 11.3   4.40  9.43  5.66
12        12  60.3 13.5   5.77  9.62 10.9 
 ###### age crosstabs


recoded_data$age_recoded <- cut(recoded_data$age,
                      breaks = c(18, 25, 35, 45, 55, 100),
                       include.lowest = T,
                      right = F)

data_proportions_age <- recoded_data |>
  count(age_recoded, condition) |>
  group_by(condition) |> 
  mutate(proportion = (n) / sum(n)) |> 
  ungroup()

 data_proportions_age <- data_proportions_age |> 
  pivot_wider(names_from = age_recoded, values_from = c(n, proportion), values_fill = 0)|> 
  mutate(`18-24` = `proportion_[18,25)`*100) |> 
  mutate(`25-34` = `proportion_[25,35)`*100) |> 
  mutate(`35-44` = `proportion_[35,45)`*100) |> 
  mutate(`45-54` = `proportion_[45,55)`*100) |> 
  mutate(`55-100` = `proportion_[55,100]`*100) |> 
  select(condition, `18-24`, `25-34`, `35-44`, `45-54`, `55-100`) |> 
  print()
# A tibble: 12 × 6
   condition `18-24` `25-34` `35-44` `45-54` `55-100`
       <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
 1         1    12.3    21.4    18.2    12.3     35.7
 2         2    10.4    20.9    20.2    15.3     33.1
 3         3    11.6    21.3    18.7    16.8     31.6
 4         4    10.1    16.5    16.5    12.7     44.3
 5         5    12.7    19.0    12.0    21.5     34.8
 6         6    15.4    16.7    17.3    16.7     34.0
 7         7    12.0    19.0    19.0    13.3     36.7
 8         8    14.8    20      14.2    14.2     36.8
 9         9    15.4    16.8    18.8    14.8     34.2
10        10    13.8    14.4    22.5    16.9     32.5
11        11    11.9    17.6    13.8    14.5     42.1
12        12    14.1    21.2    19.2    17.3     28.2

Reweighting data

population proportion, from qualtrics:

democrats: 29%, republicans, 28%, independents, 42.6%.

female 50.25%, male 49.75%.

age 18-24 12.5%, age 25-34 17.5%, age 35-44 16.75%, age 45-54 15.37%, age 55-100

27.88%. asian 6.25%, black 11.75%, mixed 10.88%, other 7.75%, white 63.38%