Lab project: Publish survey analy

Read Packages

library(tidyverse)

## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0

## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(leaflet)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(tidytext)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(DT) 
library(wordcloud2)
library(corrr)

Get the data

survey <- read_csv("https://docs.google.com/spreadsheets/d/138AJq8uaPl0XlF3Bjj3q0g_Pb3RbmBQLFazj4hhJalw/export?format=csv")

## Parsed with column specification:
## cols(
##   Timestamp = col_character(),
##   `What is your sex?` = col_character(),
##   `What is your age?` = col_character(),
##   `What is your highest education completed?` = col_character(),
##   `If you went to college, what was your major?` = col_character(),
##   `Do you have children?` = col_character(),
##   `What state are you from?` = col_character(),
##   `Are you religious?` = col_double(),
##   `I believe children should be vaccinated.` = col_double(),
##   `I trust the information I receive about shots.` = col_double(),
##   `I believe that there could be a link between the MMR vaccination and autism.` = col_double(),
##   `I worry about possible side effects of vaccinations.` = col_double(),
##   `I believe the media exaggerates reports about disease outbreak and vaccinations.` = col_double(),
##   `If I were to have a child today, I would want them to have all of the recommended vaccinations.` = col_double(),
##   `Healthy children should be required to be vaccinated to attend school because of potential risks to others.` = col_double(),
##   `If you wish to expand on any of your answers above, do so here:` = col_character()
## )

Check the variable

glimpse(survey)

## Observations: 305
## Variables: 16
## $ Timestamp                                                                                                     <chr> …
## $ `What is your sex?`                                                                                           <chr> …
## $ `What is your age?`                                                                                           <chr> …
## $ `What is your highest education completed?`                                                                   <chr> …
## $ `If you went to college, what was your major?`                                                                <chr> …
## $ `Do you have children?`                                                                                       <chr> …
## $ `What state are you from?`                                                                                    <chr> …
## $ `Are you religious?`                                                                                          <dbl> …
## $ `I believe children should be vaccinated.`                                                                    <dbl> …
## $ `I trust the information I receive about shots.`                                                              <dbl> …
## $ `I believe that there could be a link between the MMR vaccination and autism.`                                <dbl> …
## $ `I worry about possible side effects of vaccinations.`                                                        <dbl> …
## $ `I believe the media exaggerates reports about disease outbreak and vaccinations.`                            <dbl> …
## $ `If I were to have a child today, I would want them to have all of the recommended vaccinations.`             <dbl> …
## $ `Healthy children should be required to be vaccinated to attend school because of potential risks to others.` <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:`                                             <chr> …

change the title name

survey <- survey %>% 
  rename(Sex = `What is your sex?`, 
         Age = `What is your age?`, 
         Education = `What is your highest education completed?`, 
         Major = `If you went to college, what was your major?`, 
         Children = `Do you have children?`,  
         From = `What state are you from?`,  
         Religiosity = `Are you religious?`, 
         Vaccinate = `I believe children should be vaccinated.`,
         Shots_Information  = `I trust the information I receive about shots.`, 
         Autism = `I believe that there could be a link between the MMR vaccination and autism.`, 
         Side_Effects =  `I worry about possible side effects of vaccinations.`, 
         Media = `I believe the media exaggerates reports about disease outbreak and vaccinations.`, 
         Have_all = `If I were to have a child today, I would want them to have all of the recommended vaccinations.`, 
         Healthy_Children = `Healthy children should be required to be vaccinated to attend school because of potential risks to others.`)

Check the changed variables：

glimpse(survey)

## Observations: 305
## Variables: 16
## $ Timestamp                                                         <chr> …
## $ Sex                                                               <chr> …
## $ Age                                                               <chr> …
## $ Education                                                         <chr> …
## $ Major                                                             <chr> …
## $ Children                                                          <chr> …
## $ From                                                              <chr> …
## $ Religiosity                                                       <dbl> …
## $ Vaccinate                                                         <dbl> …
## $ Shots_Information                                                 <dbl> …
## $ Autism                                                            <dbl> …
## $ Side_Effects                                                      <dbl> …
## $ Media                                                             <dbl> …
## $ Have_all                                                          <dbl> …
## $ Healthy_Children                                                  <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:` <chr> …

Analysis

Age of participants:

survey %>% 
  mutate(Age = as.numeric(Age))

## Warning: NAs introduced by coercion

Adds new variables and preserves existing ones

survey <- survey %>% 
  mutate(Age = as.numeric(Age))

## Warning: NAs introduced by coercion

Check:

glimpse(survey)

## Observations: 305
## Variables: 16
## $ Timestamp                                                         <chr> …
## $ Sex                                                               <chr> …
## $ Age                                                               <dbl> …
## $ Education                                                         <chr> …
## $ Major                                                             <chr> …
## $ Children                                                          <chr> …
## $ From                                                              <chr> …
## $ Religiosity                                                       <dbl> …
## $ Vaccinate                                                         <dbl> …
## $ Shots_Information                                                 <dbl> …
## $ Autism                                                            <dbl> …
## $ Side_Effects                                                      <dbl> …
## $ Media                                                             <dbl> …
## $ Have_all                                                          <dbl> …
## $ Healthy_Children                                                  <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:` <chr> …

Have Childern or not:

survey %>% 
  mutate(Children = as_factor(Children))

Grouping contrast

survey %>% 
  drop_na(Children, Have_all) %>% 
  group_by(Children) %>% 
  summarize(Have = mean(Have_all))

Graph:

survey %>% 
  drop_na(Children, Have_all) %>% 
  group_by(Children) %>% 
  summarize(Have = mean(Have_all)) %>% 
  ggplot(aes(x = Children, y = Have)) + 
  geom_col()

Do the t-test:

t.test(survey$Have_all ~ survey$Children)

## 
##  Welch Two Sample t-test
## 
## data:  survey$Have_all by survey$Children
## t = -3.211, df = 224.92, p-value = 0.001516
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7239792 -0.1733153
## sample estimates:
##  mean in group No mean in group Yes 
##          1.345679          1.794326

Correlate the data:

survey %>% 
  select(Age, Religiosity, Vaccinate, Autism, Media, Have_all, Healthy_Children) %>% 
  correlate() %>% 
  shave()

## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'

Graph

survey %>% 
  select(Age, Religiosity, Vaccinate, Autism, Media, Have_all, Healthy_Children) %>% 
  correlate() %>% 
  shave() %>% 
  rplot()

## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'

Sex:

survey_sex <- survey %>% 
  mutate(Sex = str_to_lower(Sex), Vaccinate = as_factor(Vaccinate)) 

survey_sex %>% 
  count(Sex) %>% 
  datatable()

Sorting data:

survey_sex$Sex <- recode(survey_sex$Sex, "m" = "male", "republican ( male)" = "male", "f" = "female", "femail" = "female", "femal" = "female", "feme" = "female", "replace sex with gender! female" = "female", "girl" = "female", "gemale" = "female", "apache attack helicopter" = "not enough", "californian" = "not enough", "mucho" = "not enough", "snap-on tool box" = "not enough", "the rough, passionate kind" = "not enough")

Check:

survey_sex %>% 
  count(Sex) %>% 
  datatable()

survey_sex %>% 
  count(Vaccinate) %>% 
  datatable()

## Warning: Factor `Vaccinate` contains implicit NA, consider using
## `forcats::fct_explicit_na`

survey_sex <- survey_sex %>% 
  mutate(Vaccinate_simple = fct_collapse(Vaccinate,
                                            agree = c("1", "2"),
                                            disagree = c("4", "5"), 
                                            middle = c("3")))

survey_sex %>% 
  count(Vaccinate_simple)

## Warning: Factor `Vaccinate_simple` contains implicit NA, consider using
## `forcats::fct_explicit_na`

Graph:

survey_sex %>% 
  drop_na(Vaccinate_simple) %>% 
  drop_na(Sex) %>% 
  ggplot(aes(x = Vaccinate_simple, fill = Sex)) +
  geom_bar(position = "fill") +
  coord_flip() +
  scale_fill_viridis_d()

survey_sex %>% 
  drop_na(Vaccinate_simple) %>% 
  drop_na(Sex) %>% 
  ggplot(aes(x = Vaccinate_simple, fill = Sex)) +
  geom_bar(position = "dodge") +
  theme_minimal() +
  coord_flip() +
  scale_fill_viridis_d()

Major

survey_major <- survey %>% 
  mutate(Major = as_factor(Major)) 

survey_major %>% 
  count(Major)

## Warning: Factor `Major` contains implicit NA, consider using
## `forcats::fct_explicit_na`

Create a word cloud of the major:

survey_major %>%
  count(Major, sort = T) %>%
  top_n(100) %>% 
  wordcloud2(size = 3)

## Warning: Factor `Major` contains implicit NA, consider using
## `forcats::fct_explicit_na`

## Selecting by n