Final Project

DACSS 603, Spring 2022

Erin Tracy
4/24/2022
knitr::opts_chunk$set(echo = TRUE)
options(repos = list(CRAN="http://cran.rstudio.com/"))
install.packages('plyr', repos = "http://cran.us.r-project.org")
package 'plyr' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("readr")
package 'readr' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("tidyverse")
package 'tidyverse' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("readxl")
package 'readxl' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages ("dplyr")
package 'dplyr' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
package 'here' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("ggplot2")
package 'ggplot2' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("distill")
package 'distill' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
install.packages("knitr")
package 'knitr' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\erink\AppData\Local\Temp\RtmpyqZ9zd\downloaded_packages
data <- read_csv(here("2020Social_Media.csv"))
summary(data)
     caseid     profile_gender   profile_age   profile_racethnicity
 Min.   :   1   Min.   :1.000   Min.   :18.0   Min.   :1.000       
 1st Qu.:1459   1st Qu.:1.000   1st Qu.:36.0   1st Qu.:1.000       
 Median :2916   Median :1.000   Median :51.0   Median :1.000       
 Mean   :2915   Mean   :1.495   Mean   :50.5   Mean   :1.636       
 3rd Qu.:4371   3rd Qu.:2.000   3rd Qu.:65.0   3rd Qu.:2.000       
 Max.   :5830   Max.   :2.000   Max.   :80.0   Max.   :4.000       
 profile_educ5   profile_marital profile_income  profile_region4
 Min.   :1.000   Min.   :1.000   Min.   : 1.00   Min.   :1.000  
 1st Qu.:3.000   1st Qu.:1.000   1st Qu.: 7.00   1st Qu.:2.000  
 Median :3.000   Median :1.000   Median :11.00   Median :3.000  
 Mean   :3.397   Mean   :2.549   Mean   :10.36   Mean   :2.671  
 3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:14.00   3rd Qu.:4.000  
 Max.   :5.000   Max.   :6.000   Max.   :18.00   Max.   :4.000  
 profile_region9 profile_metro    profile_relig     profile_born    
 Min.   :1.00    Min.   :0.0000   Min.   :-7.000   Min.   :-7.0000  
 1st Qu.:3.00    1st Qu.:1.0000   1st Qu.: 1.000   1st Qu.:-1.0000  
 Median :5.00    Median :1.0000   Median : 9.000   Median : 1.0000  
 Mean   :5.23    Mean   :0.8313   Mean   : 6.706   Mean   : 0.8106  
 3rd Qu.:8.00    3rd Qu.:1.0000   3rd Qu.:12.000   3rd Qu.: 2.0000  
 Max.   :9.00    Max.   :1.0000   Max.   :14.000   Max.   : 2.0000  
      ftpp       
 Min.   : -7.00  
 1st Qu.: 28.00  
 Median : 61.00  
 Mean   : 56.98  
 3rd Qu.: 88.00  
 Max.   :100.00  
new_df<- subset(data, ftpp>0 & profile_born> 0 & profile_relig> 0)
head(new_df)
# A tibble: 6 x 13
  caseid profile_gender profile_age profile_racethnicity profile_educ5
   <dbl>          <dbl>       <dbl>                <dbl>         <dbl>
1    235              2          63                    1             3
2   1286              1          41                    1             3
3   4981              1          52                    2             5
4   1183              2          67                    1             3
5   3158              1          46                    2             4
6   5134              2          20                    1             2
# ... with 8 more variables: profile_marital <dbl>,
#   profile_income <dbl>, profile_region4 <dbl>,
#   profile_region9 <dbl>, profile_metro <dbl>, profile_relig <dbl>,
#   profile_born <dbl>, ftpp <dbl>
summary(new_df)
     caseid     profile_gender   profile_age   profile_racethnicity
 Min.   :   1   Min.   :1.000   Min.   :18.0   Min.   :1.00        
 1st Qu.:1513   1st Qu.:1.000   1st Qu.:37.0   1st Qu.:1.00        
 Median :2938   Median :2.000   Median :53.0   Median :1.00        
 Mean   :2929   Mean   :1.536   Mean   :51.8   Mean   :1.67        
 3rd Qu.:4368   3rd Qu.:2.000   3rd Qu.:66.0   3rd Qu.:2.00        
 Max.   :5829   Max.   :2.000   Max.   :80.0   Max.   :4.00        
 profile_educ5  profile_marital profile_income  profile_region4
 Min.   :1.00   Min.   :1.000   Min.   : 1.00   Min.   :1.000  
 1st Qu.:3.00   1st Qu.:1.000   1st Qu.: 7.00   1st Qu.:2.000  
 Median :3.00   Median :1.000   Median :11.00   Median :3.000  
 Mean   :3.37   Mean   :2.475   Mean   :10.21   Mean   :2.644  
 3rd Qu.:4.00   3rd Qu.:5.000   3rd Qu.:13.00   3rd Qu.:3.000  
 Max.   :5.00   Max.   :6.000   Max.   :18.00   Max.   :4.000  
 profile_region9 profile_metro    profile_relig     profile_born  
 Min.   :1.00    Min.   :0.0000   Min.   : 1.000   Min.   :1.000  
 1st Qu.:3.00    1st Qu.:1.0000   1st Qu.: 1.000   1st Qu.:1.000  
 Median :5.00    Median :1.0000   Median : 2.000   Median :2.000  
 Mean   :5.16    Mean   :0.8308   Mean   : 4.889   Mean   :1.686  
 3rd Qu.:7.00    3rd Qu.:1.0000   3rd Qu.:12.000   3rd Qu.:2.000  
 Max.   :9.00    Max.   :1.0000   Max.   :14.000   Max.   :2.000  
      ftpp       
 Min.   :  1.00  
 1st Qu.: 35.00  
 Median : 60.00  
 Mean   : 57.96  
 3rd Qu.: 85.00  
 Max.   :100.00  
ggplot(data =new_df) +
  geom_smooth(mapping = aes(x = profile_educ5, y = ftpp))

boxplot(ftpp~profile_educ5,data=new_df, main="Title TBD",
   xlab="Education", ylab="Rating of Planned Parenthood")

boxplot(ftpp~profile_gender,data=new_df, main="Gender",
   xlab="Gender", ylab="Rating of Planned Parenthood")

boxplot(ftpp~profile_relig, data=new_df, main="Title TBD",
   xlab="Religious", ylab="Rating of Planned Parenthood")

boxplot(ftpp~profile_racethnicity, data=new_df, main="Title TBD",
   xlab="Race", ylab="Rating of Planned Parenthood")

boxplot(ftpp~profile_metro, data=new_df, main="Title TBD",
   xlab="Metro", ylab="Rating of Planned Parenthood")

boxplot(ftpp~profile_region9, data=new_df, main="Title TBD",
   xlab="profile_region9", ylab="Rating of Planned Parenthood")

ggplot(data =new_df) +
  geom_smooth(mapping = aes(y = ftpp, x = profile_age))

ggplot(data =new_df) +
  geom_smooth(mapping = aes(y = ftpp, x = profile_income))

boxplot(ftpp~profile_born, data=new_df, main="Born-again or Evangelical Christian",
   xlab="Born", ylab="Rating of Planned Parenthood")

use cor()

table()