Questions about and limitations with the data

–how many people have answered in both surveys?
–jails with less than 5 responses are suppressed, so must use nationwide tibbles to get nationwide totals
–survey responses are not representative
–data is not broken down on individual level but aggregated to the level of each jail
–number of respondents in any given jail to these questions are pretty small

Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# import all csv files in data folder into this project
data_folder <- "../data/"

# list all csv files in the folder
csv_files <- list.files(path = data_folder, pattern = "\\.csv$", full.names = TRUE)

# import each csv as a tibble in data_frames list
data_frames <- csv_files %>%
    set_names(~ str_remove(basename(.), "\\.csv$")) %>%  # remove .csv from filenames
    map(~ as.data.frame(read_csv(.)))
## Rows: 3 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): eligible_to_vote
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 379 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, eligible_to_vote
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 445 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, gender
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): people_with_convictions_should_vote
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 22 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, people_with_convictions_should_vote
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sentence_length
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 26 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, sentence_length
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 5 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): skills_for_politics
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 418 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, skills_for_politics
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 457 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, us_ready_elect_woman_president
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): vote_impact_government_level
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 385 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, vote_impact_government_level
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 5 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): who_vote_for
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): who_vote_for_supplement
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 423 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, who_vote_for_supplement
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 373 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): state, answer_set, facility_name, who_vote_for
## dbl (3): count, n_respondents, pct_of_respondents
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# add each tibble as a data frame in the global environment
list2env(data_frames, envir = .GlobalEnv)
## <environment: R_GlobalEnv>
# print names of the imported data frames
print(names(data_frames))
##  [1] "eligible_to_vote_nationwide"            
##  [2] "eligible_to_vote"                       
##  [3] "gender"                                 
##  [4] "people_with_convictions_nationwide"     
##  [5] "people_with_convictions"                
##  [6] "sentence_length_nationwide"             
##  [7] "sentence_length"                        
##  [8] "skills_for_politics_nationwide"         
##  [9] "skills_for_politics"                    
## [10] "us_ready_elect_woman_president"         
## [11] "vote_impact_government_level_nationwide"
## [12] "vote_impact_government_level"           
## [13] "who_vote_for_nationwide"                
## [14] "who_vote_for_supplement_nationwide"     
## [15] "who_vote_for_supplement"                
## [16] "who_vote_for"

Voting Eligibility

# how many respondents are not sure of their eligibility to vote?
head(eligible_to_vote)
##   state  answer_set                          facility_name eligible_to_vote
## 1    IL all-surveys                        Cook County, IL              Yes
## 2    CA all-surveys                 Santa Clara County, CA              Yes
## 3    AZ all-surveys   Maricopa County Sheriff's Office, AZ               No
## 4    AZ all-surveys   Maricopa County Sheriff's Office, AZ              Yes
## 5    CA all-surveys Sacramento County Sheriff's Office, CA              Yes
## 6    AZ all-surveys   Maricopa County Sheriff's Office, AZ         Not sure
##   count n_respondents pct_of_respondents
## 1   356           418          0.8516746
## 2   238           347          0.6858790
## 3   211           505          0.4178218
## 4   164           505          0.3247525
## 5   130           233          0.5579399
## 6   129           505          0.2554455
head(eligible_to_vote_nationwide)
##   eligible_to_vote count n_respondents pct_of_respondents
## 1              Yes  4165          8030          0.5186800
## 2         Not sure  1676          8030          0.2087173
## 3               No  1585          8030          0.1973848
# 1,676 incarcerated people (20.8% of survey respondents) not sure about eligibility to vote

# how does this break down for each jail?
not_sure_eligibility <- eligible_to_vote %>%
  filter(eligible_to_vote == "Not sure") %>%  # filter rows where eligible_to_vote is "Not sure"
  arrange(desc(pct_of_respondents)) %>%  # sort by pct_of_respondents
  select(state,facility_name,count,n_respondents,pct_of_respondents) #select columns of interest

head(not_sure_eligibility)
##   state                             facility_name count n_respondents
## 1    NM          Eddy County Detention Center, NM    10            19
## 2    MA Berkshire County House of Corrections, MA    10            20
## 3    SC                      Clarendon County, SC     6            13
## 4    NC     Durham County NC - Detention Facility     9            20
## 5    PA                     Washington County, PA    19            43
## 6    NJ                          Ocean County, NJ     7            16
##   pct_of_respondents
## 1          0.5263158
## 2          0.5000000
## 3          0.4615385
## 4          0.4500000
## 5          0.4418605
## 6          0.4375000
not_sure_eligibility %>%
  filter(n_respondents > 50)
##    state                               facility_name count n_respondents
## 1     TX                 Potter County Detention, TX    20            52
## 2     WA                           Yakima County, WA    29            82
## 3     TX                        Guadalupe County, TX    18            52
## 4     IN               Hamilton County Main Jail, IN    22            65
## 5     CA               Orange County, CA - Theo Lacy    31            93
## 6     OH          Mahoning County Justice Center, OH    25            75
## 7     WA                     Spokane County Jail, WA    24            72
## 8     TX                      Travis County Jail, TX    53           162
## 9     GA               Athens Clarke County Jail, GA    17            52
## 10    PA                        Lackawana County, PA    27            84
## 11    IL                        Winnebago County, IL    34           108
## 12    MI                           Macomb County, MI    17            54
## 13    CA      Sacramento County Sheriff's Office, CA    70           233
## 14    NC     Mecklenburg County Sheriffs Office,  NC    20            67
## 15    PA                     Westmoreland County, PA    17            57
## 16    CA    Orange County, CA - Central Jail Complex    20            68
## 17    TX                           Collin County, TX    15            51
## 18    VA    Virginia Beach Correctional Facility, VA    23            80
## 19    AZ                        Pima County Jail, AZ    42           149
## 20    CA                       Stanislaus County, CA    38           142
## 21    CA                 San Joaquin County Jail, CA    20            78
## 22    AZ        Maricopa County Sheriff's Office, AZ   129           505
## 23    NY                          Suffolk County, NY    13            51
## 24    CA         Ventura County Sheriff's Office, CA    18            72
## 25    TX                         McLennan County, TX    14            57
## 26    CO                             Weld County, CO    21            88
## 27    NC                       Cumberland County, NC    17            72
## 28    CA                           Fresno County, CA    38           166
## 29    PA                     Dauphin County Jail, PA    19            84
## 30    NY      Monroe County Sheriff's Department, NY    14            62
## 31    NY     Albany County Correctional Facility, NY    16            71
## 32    OH          Cuyahoga County Justice Center, OH    34           151
## 33    PA                           Lehigh County, PA    13            58
## 34    VA          Chesapeake Correctional Center, VA    14            64
## 35    GA         Gwinnett County Sheriffs Office, GA    17            78
## 36    MA              Essex County Middleton HOC, MA    13            60
## 37    CA                    Santa Barbara County, CA    11            51
## 38    CO                             Mesa County, CO    12            59
## 39    PA Montgomery County Correctional Facility, PA    11            56
## 40    CA                      Santa Clara County, CA    64           347
## 41    MO                   St. Louis County Jail, MO    12            74
## 42    MA   Middlesex County House of Corrections, MA     8            53
## 43    MI                             Kent County, MI    10            72
## 44    MA                   Worcester County Jail, MA     7            51
## 45    MA                     Suffolk County Jail, MA     8            60
## 46    CO                           Pueblo County, CO     8            63
## 47    CO                      Denver County Jail, CO    11           103
## 48    IL                             Cook County, IL    32           418
##    pct_of_respondents
## 1          0.38461538
## 2          0.35365854
## 3          0.34615385
## 4          0.33846154
## 5          0.33333333
## 6          0.33333333
## 7          0.33333333
## 8          0.32716049
## 9          0.32692308
## 10         0.32142857
## 11         0.31481481
## 12         0.31481481
## 13         0.30042918
## 14         0.29850746
## 15         0.29824561
## 16         0.29411765
## 17         0.29411765
## 18         0.28750000
## 19         0.28187919
## 20         0.26760563
## 21         0.25641026
## 22         0.25544554
## 23         0.25490196
## 24         0.25000000
## 25         0.24561404
## 26         0.23863636
## 27         0.23611111
## 28         0.22891566
## 29         0.22619048
## 30         0.22580645
## 31         0.22535211
## 32         0.22516556
## 33         0.22413793
## 34         0.21875000
## 35         0.21794872
## 36         0.21666667
## 37         0.21568627
## 38         0.20338983
## 39         0.19642857
## 40         0.18443804
## 41         0.16216216
## 42         0.15094340
## 43         0.13888889
## 44         0.13725490
## 45         0.13333333
## 46         0.12698413
## 47         0.10679612
## 48         0.07655502
# These facilities in texas, washington and Indiana could be interesting to contact

# how do incarcerated people feel about whether they should vote?
head(people_with_convictions_nationwide)
##                                           people_with_convictions_should_vote
## 1                                            While incarcerated for any crime
## 2                                                 After they have left prison
## 3 While incarcerated, but only if they were convicted of non-violent offenses
##   count n_respondents pct_of_respondents
## 1   213           375         0.56800000
## 2    95           375         0.25333333
## 3    22           375         0.05866667
# 57% of respondents said they should be able to vote while incarcerated 
# note - question is worded "after they left prison" but these respondents are in jail, not prison
# looks like there was no answer option like "they should never be allowed to vote"

head(people_with_convictions) %>%
  filter(people_with_convictions_should_vote == "While incarcerated for any crime") %>%
  arrange(desc(pct_of_respondents))
##   state  answer_set                      facility_name
## 1    TX all-surveys               MTC: Giles Dalby, TX
## 2    WV all-surveys WV DCR North Central Regional Jail
## 3    WV all-surveys  WV DCR Southwestern Regional Jail
## 4    WV all-surveys WV DCR Tygart Valley Regional Jail
##   people_with_convictions_should_vote count n_respondents pct_of_respondents
## 1    While incarcerated for any crime    57            89          0.6404494
## 2    While incarcerated for any crime    47            74          0.6351351
## 3    While incarcerated for any crime    22            36          0.6111111
## 4    While incarcerated for any crime    19            40          0.4750000
#interesting that three of the four jails listed here are in West Virginia

head(vote_impact_government_level_nationwide)
##   vote_impact_government_level count n_respondents pct_of_respondents
## 1                        State  2100          7082         0.29652640
## 2                  City / Town  2025          7082         0.28593618
## 3                      Federal  1660          7082         0.23439706
## 4                       County   605          7082         0.08542785
# about 30% think biggest impact will be at the state level

Voting in 2024 Election

# compare likelihood of voting for major candidates in first and second survey
head(who_vote_for_nationwide) #56% for Trump in first survey, 9.6% for Biden
##            who_vote_for count n_respondents pct_of_respondents
## 1       Donald J. Trump  4708          8385         0.56147883
## 2             Joe Biden   810          8385         0.09660107
## 3         Wouldn't vote   700          8385         0.08348241
## 4 Third-Party Candidate   425          8385         0.05068575
## 5            Don't know   383          8385         0.04567680
head(who_vote_for_supplement_nationwide) #52% for Trump second survey, 24.6% for Harris
##          who_vote_for_supplement count n_respondents pct_of_respondents
## 1                Donald J. Trump  4381          8396       0.5217960934
## 2                  Kamala Harris  2068          8396       0.2463077656
## 3                     Don't know   342          8396       0.0407336827
## 4                  Wouldn't vote   265          8396       0.0315626489
## 5          Third-party candidate    43          8396       0.0051214864
## 6 Donald J. Trump, Kamala Harris     8          8396       0.0009528347
# interestingly, a LOT more support for Harris than for Biden

# compare across jails for jails with more respondents (more than 50)

# what percent of respondents said they would vote for biden in the first survey?
biden_vote <- who_vote_for %>%
  filter(who_vote_for == "Joe Biden",  # filter rows for Biden voters
         n_respondents > 50) %>%  # filter for jails with more than 50 respondents
  arrange(desc(pct_of_respondents)) %>%  # sort by pct_of_respondents
  select(facility_name, pct_of_respondents) %>%  # select columns of interest
  rename(pct_for_biden = pct_of_respondents)  # rename the column

# what percent of respondents said they would vote for harris in the second survey?
harris_vote <- who_vote_for_supplement %>%
  filter(who_vote_for_supplement == "Kamala Harris",  # filter rows for Harris voters
         n_respondents > 50) %>%  # filter for jails with more than 50 respondents
  arrange(desc(pct_of_respondents)) %>%  # sort by pct_of_respondents
  select(facility_name,pct_of_respondents) %>%  # select columns of interest
  rename(pct_for_harris = pct_of_respondents)  # rename the column

# join the datasets by facility_name
biden_vs_harris <- left_join(biden_vote, harris_vote, by = "facility_name")

# calculate the percentage point difference between support for harris and biden in the large facilities
biden_vs_harris <- biden_vs_harris %>%
  mutate(pct_point_difference = pct_for_harris - pct_for_biden) %>%  
  arrange(desc(pct_point_difference))  # sort by pct_point_difference in descending order

# view data
print(biden_vs_harris)
##                                  facility_name pct_for_biden pct_for_harris
## 1                         MTC: Giles Dalby, TX    0.10112360      0.3548387
## 2                              Weld County, CO    0.13043478      0.3493976
## 3                              Cook County, IL    0.13776722      0.3415385
## 4          Gwinnett County Sheriffs Office, GA    0.09210526      0.2807018
## 5  Montgomery County Correctional Facility, PA    0.16363636      0.3508772
## 6                       Travis County Jail, TX    0.10240964      0.2888889
## 7                              Kent County, MI    0.15492958      0.3392857
## 8                        Stanislaus County, CA    0.09589041      0.2777778
## 9           Cuyahoga County Justice Center, OH    0.09150327      0.2720000
## 10                        Pima County Jail, AZ    0.16326531      0.3333333
## 11                           Pueblo County, CO    0.14062500      0.3018868
## 12                             Mesa County, CO    0.13114754      0.2909091
## 13                           Fresno County, CA    0.19760479      0.3538462
## 14                        Lackawana County, PA    0.15116279      0.2985075
## 15               Orange County, CA - Theo Lacy    0.19565217      0.3421053
## 16        Maricopa County Sheriff's Office, AZ    0.16338583      0.3050000
## 17                     Dauphin County Jail, PA    0.07142857      0.2105263
## 18      Sacramento County Sheriff's Office, CA    0.09871245      0.2280702
## 19               Hamilton County Main Jail, IN    0.12500000      0.2539683
## 20                        Winnebago County, IL    0.10185185      0.2293578
## 21    Orange County, CA - Central Jail Complex    0.11594203      0.2238806
## 22                      Denver County Jail, CO    0.14423077      0.2465753
## 23         Ventura County Sheriff's Office, CA    0.22222222      0.3018868
## 24          WV DCR North Central Regional Jail    0.09333333      0.1515152
## 25                      Santa Clara County, CA    0.23295455             NA
## 26                    Santa Barbara County, CA    0.22641509             NA
## 27                         McLennan County, TX    0.21052632             NA
## 28                     Suffolk County Jail, MA    0.17741935             NA
## 29     Mecklenburg County Sheriffs Office,  NC    0.16923077             NA
## 30                 San Joaquin County Jail, CA    0.16455696             NA
## 31          Chesapeake Correctional Center, VA    0.15625000             NA
## 32                       Cumberland County, NC    0.13888889             NA
## 33     Albany County Correctional Facility, NY    0.13888889             NA
## 34                   St. Louis County Jail, MO    0.13513514             NA
## 35               Athens Clarke County Jail, GA    0.13461538             NA
## 36   Middlesex County House of Corrections, MA    0.13461538             NA
## 37              Essex County Middleton HOC, MA    0.13333333             NA
## 38                   Worcester County Jail, MA    0.11764706             NA
## 39                           Yakima County, WA    0.11627907             NA
## 40          Mahoning County Justice Center, OH    0.11538462             NA
## 41                          Suffolk County, NY    0.11538462             NA
## 42                           Collin County, TX    0.11538462             NA
## 43                 Potter County Detention, TX    0.11538462             NA
## 44      Monroe County Sheriff's Department, NY    0.09375000             NA
##    pct_point_difference
## 1            0.25371511
## 2            0.21896281
## 3            0.20377124
## 4            0.18859649
## 5            0.18724083
## 6            0.18647925
## 7            0.18435614
## 8            0.18188737
## 9            0.18049673
## 10           0.17006803
## 11           0.16126179
## 12           0.15976155
## 13           0.15624136
## 14           0.14734467
## 15           0.14645309
## 16           0.14161417
## 17           0.13909774
## 18           0.12935773
## 19           0.12896825
## 20           0.12750595
## 21           0.10793857
## 22           0.10234457
## 23           0.07966457
## 24           0.05818182
## 25                   NA
## 26                   NA
## 27                   NA
## 28                   NA
## 29                   NA
## 30                   NA
## 31                   NA
## 32                   NA
## 33                   NA
## 34                   NA
## 35                   NA
## 36                   NA
## 37                   NA
## 38                   NA
## 39                   NA
## 40                   NA
## 41                   NA
## 42                   NA
## 43                   NA
## 44                   NA
# biggest swing was in MTC Giles Dalby in Texas -- 25 percentage point difference

# future analysis - how does gender and race of respondents affect their likelihood to vote for Trump/Biden?

Testing a Gender Effect

# are jails with more female respondents more likely to support kamala harris?

# new dataset just showing jails with female respondents
jails_with_women <- gender %>%
  filter(gender == "Woman") %>%
  select(facility_name, pct_of_respondents) %>%
  arrange(desc(pct_of_respondents)) %>%
  rename(pct_women = pct_of_respondents)

#new dataset just showing "yes" answers to ready for woman president question
ready_for_woman_pres <- us_ready_elect_woman_president %>% 
  filter(us_ready_elect_woman_president == "Yes") %>%
  select(facility_name, pct_of_respondents) %>%
  arrange(desc(pct_of_respondents)) %>%
  rename(pct_ready_for_woman_pres = pct_of_respondents)

head(jails_with_women)
##                                facility_name pct_women
## 1                         Pulaski County, KY 0.7000000
## 2 Botetourt Craig Public Safety Facility, VA 0.5333333
## 3      Pickens County Detention Facility, SC 0.5172414
## 4                         Edovo Go - General 0.4666667
## 5                         Wichita County, TX 0.4545455
## 6                  Venango County Prison, PA 0.4375000
head(ready_for_woman_pres)
##                              facility_name pct_ready_for_woman_pres
## 1               Livingston County Jail, NY                0.8750000
## 2           Whatcom County, WA - Main Jail                0.8125000
## 3                  Catoosa County Jail, GA                0.7777778
## 4                   Dakota County Jail, MN                0.7777778
## 5 Scotts Bluff County Detention Center, NE                0.7692308
## 6   Bucks County Correctional Facility, PA                0.7500000
# join the datasets on facility_name
women_jails <- left_join(jails_with_women, ready_for_woman_pres, by = "facility_name")

# View the merged dataset
print(women_jails)
##                                     facility_name  pct_women
## 1                              Pulaski County, KY 0.70000000
## 2      Botetourt Craig Public Safety Facility, VA 0.53333333
## 3           Pickens County Detention Facility, SC 0.51724138
## 4                              Edovo Go - General 0.46666667
## 5                              Wichita County, TX 0.45454545
## 6                       Venango County Prison, PA 0.43750000
## 7     Barnstable County Correctional Facility, MA 0.41379310
## 8             Forsyth County Detention Center, GA 0.38095238
## 9                             McLennan County, TX 0.37500000
## 10                           Fairfield County, OH 0.37500000
## 11                           Muskingum County, OH 0.35714286
## 12                           Western Regional, VA 0.35185185
## 13                         Medina County Jail, OH 0.35000000
## 14                          Livingston County, MI 0.34375000
## 15           Citrus County Detention Facility, FL 0.33333333
## 16                        Anne Arundel County, MD 0.33333333
## 17                        Yavapai County Jail, AZ 0.32432432
## 18           Columbia County Detention Center, GA 0.31250000
## 19                                Iron County, UT 0.30000000
## 20                      Washtenaw County Jail, MI 0.28000000
## 21                                Hunt County, TX 0.28000000
## 22                  Athens Clarke County Jail, GA 0.27941176
## 23                         Etowah County Jail, AL 0.27777778
## 24                              Pueblo County, CO 0.27659574
## 25               Eddy County Detention Center, NM 0.27586207
## 26                            Onondaga County, NY 0.27272727
## 27                         Warren County Jail, TN 0.26923077
## 28                  Hamilton County Main Jail, IN 0.26530612
## 29       Chaves County Adult Detention Center, NM 0.26470588
## 30            Henrico County Sheriff's Office, VA 0.26086957
## 31                            Sarasota County, FL 0.25757576
## 32                          Brown County Jail, WI 0.25675676
## 33                   Douglas Detention Center, CO 0.25581395
## 34                             Larimer County, CO 0.25531915
## 35             Chesapeake Correctional Center, VA 0.25333333
## 36             Greene County, OH: Adult Detention 0.25000000
## 37             Mahoning County Justice Center, OH 0.24705882
## 38                             Portage County, OH 0.24137931
## 39            Shawnee County Detention Center, KS 0.23684211
## 40                              Nassau County, FL 0.23333333
## 41                       Sullivan County Jail, TN 0.22727273
## 42                             Lincoln County, OR 0.22500000
## 43                      St. Louis County Jail, MO 0.22340426
## 44                              Macomb County, MI 0.21818182
## 45                                Kent County, MI 0.21000000
## 46                              Madera County, CA 0.20879121
## 47                               Dodge County, WI 0.20588235
## 48                                Polk County, IA 0.20454545
## 49    Montgomery County Correctional Facility, PA 0.20430108
## 50                     Pamunkey Regional Jail, VA 0.20338983
## 51                           Santa Rosa County FL 0.20000000
## 52              Curry County Detention Center, NM 0.20000000
## 53                      Sheboygan County Jail, WI 0.20000000
## 54                          Pinal County Jail, AZ 0.19607843
## 55                              Sonoma County, CA 0.19512195
## 56                   WV DCR Central Regional Jail 0.19512195
## 57                    Potter County Detention, TX 0.19480519
## 58                            Hamilton County, TN 0.19148936
## 59             WV DCR North Central Regional Jail 0.19130435
## 60               Luna County Detention Center, NM 0.18918919
## 61                King County Adult Detention, WA 0.18750000
## 62                 Rappahannock Regional Jail, VA 0.18666667
## 63                         Onslow County Jail, NC 0.18181818
## 64                        Westmoreland County, PA 0.17977528
## 65                       Pinellas County Jail, FL 0.17777778
## 66                         Craven County Jail, NC 0.17647059
## 67                            Franklin County, PA 0.17647059
## 68                Lea County Sheriff's Office, NM 0.17142857
## 69              WV DCR Southwestern Regional Jail 0.17021277
## 70         Washington County Detention Center, MD 0.16666667
## 71                                 Lee County, FL 0.16393443
## 72                       Escambia County Jail, FL 0.16363636
## 73                              Summit County, OH 0.16326531
## 74                 Whatcom County, WA - Main Jail 0.16326531
## 75                       Santa Barbara County, CA 0.16216216
## 76                                Mesa County, CO 0.16129032
## 77           Maricopa County Sheriff's Office, AZ 0.16118048
## 78                             Johnson County, TX 0.16000000
## 79                        Elkhart County Jail, IN 0.15789474
## 80     Richland County Glenn Detention Center, SC 0.15789474
## 81                    San Joaquin County Jail, CA 0.15625000
## 82                   Pasco County Corrections, FL 0.15540541
## 83                                Cobb County, GA 0.15517241
## 84             Onondaga County-Justice Center, NY 0.15384615
## 85                           Lancaster County, NE 0.14864865
## 86                              Collin County, TX 0.14705882
## 87                           Guadalupe County, TX 0.14473684
## 88         Sacramento County Sheriff's Office, CA 0.14429530
## 89       Orange County, CA - Central Jail Complex 0.14285714
## 90       Niagara County Correctional Facility, NY 0.14285714
## 91                                Weld County, CO 0.14179104
## 92         Bristol County House of Correction, MA 0.14035088
## 93                             Suffolk County, NY 0.13888889
## 94            Ventura County Sheriff's Office, CA 0.13861386
## 95                             Lebanon County, PA 0.13846154
## 96                    Lancaster County Prison, PA 0.13793103
## 97                        Suffolk County Jail, MA 0.13698630
## 98                              Yakima County, WA 0.13513514
## 99             WV DCR Tygart Valley Regional Jail 0.13333333
## 100                Cape Girardeau County Jail, MO 0.13043478
## 101       Roanoke City Adult Detention Center, VA 0.12500000
## 102                              Butte County, CA 0.12280702
## 103                               Kane County, IL 0.12121212
## 104                     Salt Lake City County, UT 0.12121212
## 105 Okaloosa County Department of Corrections, FL 0.12068966
## 106                        Santa Clara County, CA 0.11977716
## 107                          Bibb County Jail, GA 0.11764706
## 108                          Lackawana County, PA 0.11607143
## 109                          Leon County Jail, FL 0.11340206
## 110       Mecklenburg County Sheriffs Office,  NC 0.11111111
## 111      Virginia Beach Correctional Facility, VA 0.11009174
## 112          Davidson County Detention Center, TN 0.10924370
## 113            Cuyahoga County Justice Center, OH 0.10593220
## 114                          Pima County Jail, AZ 0.10476190
## 115       Jefferson County Detention Facility, CO 0.10389610
## 116                         Washington County, PA 0.09836066
## 117                       Spokane County Jail, WA 0.09638554
## 118      Hillsborough County Sheriff's Office, FL 0.09340659
## 119                         Stanislaus County, CA 0.09195402
## 120        Monroe County Sheriff's Department, NY 0.09195402
## 121            DC DOC: Central Detention Facility 0.08641975
## 122                       Dauphin County Jail, PA 0.08403361
## 123                               Cook County, IL 0.07557118
## 124                        Denver County Jail, CO 0.07333333
## 125           Gwinnett County Sheriffs Office, GA 0.07142857
## 126                         Cumberland County, NC 0.06976744
## 127                        Travis County Jail, TX 0.06341463
##     pct_ready_for_woman_pres
## 1                         NA
## 2                         NA
## 3                  0.4642857
## 4                         NA
## 5                         NA
## 6                         NA
## 7                         NA
## 8                         NA
## 9                  0.4791667
## 10                        NA
## 11                        NA
## 12                 0.5185185
## 13                 0.6363636
## 14                 0.6000000
## 15                 0.2666667
## 16                        NA
## 17                 0.4444444
## 18                 0.5882353
## 19                 0.6666667
## 20                 0.5000000
## 21                        NA
## 22                 0.4687500
## 23                 0.4137931
## 24                 0.6078431
## 25                 0.3809524
## 26                 0.6000000
## 27                 0.4615385
## 28                 0.5714286
## 29                        NA
## 30                 0.4583333
## 31                 0.6060606
## 32                 0.5428571
## 33                 0.5333333
## 34                 0.5483871
## 35                 0.5937500
## 36                        NA
## 37                 0.5862069
## 38                 0.4736842
## 39                 0.5757576
## 40                 0.4545455
## 41                 0.4886364
## 42                 0.5384615
## 43                 0.5277778
## 44                        NA
## 45                 0.5000000
## 46                 0.5312500
## 47                 0.4444444
## 48                 0.5384615
## 49                 0.5789474
## 50                 0.2857143
## 51                 0.2812500
## 52                 0.6000000
## 53                 0.3666667
## 54                 0.6363636
## 55                 0.6451613
## 56                 0.2258065
## 57                 0.5319149
## 58                 0.5434783
## 59                 0.4393939
## 60                 0.4000000
## 61                 0.6315789
## 62                 0.4210526
## 63                 0.5757576
## 64                 0.3870968
## 65                 0.4772727
## 66                 0.6000000
## 67                 0.6250000
## 68                 0.5769231
## 69                 0.3750000
## 70                 0.3666667
## 71                 0.5294118
## 72                 0.3214286
## 73                 0.5384615
## 74                 0.8125000
## 75                 0.6097561
## 76                 0.5272727
## 77                 0.5419463
## 78                 0.4545455
## 79                 0.4406780
## 80                 0.5000000
## 81                 0.5555556
## 82                 0.4081633
## 83                 0.5526316
## 84                 0.4090909
## 85                 0.5000000
## 86                 0.5000000
## 87                 0.4047619
## 88                 0.4957265
## 89                 0.5294118
## 90                 0.5217391
## 91                 0.5487805
## 92                 0.5384615
## 93                 0.6000000
## 94                 0.5094340
## 95                 0.3529412
## 96                 0.5087719
## 97                 0.5454545
## 98                 0.3953488
## 99                 0.2857143
## 100                0.4500000
## 101                0.5641026
## 102                0.4736842
## 103                0.5681818
## 104                0.4687500
## 105                0.4067797
## 106                0.5937500
## 107                0.4705882
## 108                0.4242424
## 109                0.4947368
## 110                0.4130435
## 111                0.4444444
## 112                0.5000000
## 113                0.4435484
## 114                0.5272727
## 115                0.5714286
## 116                0.5135135
## 117                       NA
## 118                0.3989071
## 119                0.5294118
## 120                0.4634146
## 121                0.3783784
## 122                0.3421053
## 123                0.4751553
## 124                0.4507042
## 125                0.4814815
## 126                0.5000000
## 127                0.4772727
# create  scatter plot of results with a line of best fit
ggplot(women_jails, aes(x = pct_women, y = pct_ready_for_woman_pres)) +
  geom_point() +  # scatter plot
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # line of best fit
  labs(title = "Percentage of Women Jails vs. Readiness for Woman President",
       x = "Percentage of Women in Jails",
       y = "Percentage Ready for Woman President") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`geom_point()`).

# nothing significant here