library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(leaflet)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tidytext)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
library(wordcloud2)
library(corrr)
survey <- read_csv("https://docs.google.com/spreadsheets/d/138AJq8uaPl0XlF3Bjj3q0g_Pb3RbmBQLFazj4hhJalw/export?format=csv")
## Parsed with column specification:
## cols(
## Timestamp = col_character(),
## `What is your sex?` = col_character(),
## `What is your age?` = col_character(),
## `What is your highest education completed?` = col_character(),
## `If you went to college, what was your major?` = col_character(),
## `Do you have children?` = col_character(),
## `What state are you from?` = col_character(),
## `Are you religious?` = col_double(),
## `I believe children should be vaccinated.` = col_double(),
## `I trust the information I receive about shots.` = col_double(),
## `I believe that there could be a link between the MMR vaccination and autism.` = col_double(),
## `I worry about possible side effects of vaccinations.` = col_double(),
## `I believe the media exaggerates reports about disease outbreak and vaccinations.` = col_double(),
## `If I were to have a child today, I would want them to have all of the recommended vaccinations.` = col_double(),
## `Healthy children should be required to be vaccinated to attend school because of potential risks to others.` = col_double(),
## `If you wish to expand on any of your answers above, do so here:` = col_character()
## )
glimpse(survey)
## Observations: 305
## Variables: 16
## $ Timestamp <chr> …
## $ `What is your sex?` <chr> …
## $ `What is your age?` <chr> …
## $ `What is your highest education completed?` <chr> …
## $ `If you went to college, what was your major?` <chr> …
## $ `Do you have children?` <chr> …
## $ `What state are you from?` <chr> …
## $ `Are you religious?` <dbl> …
## $ `I believe children should be vaccinated.` <dbl> …
## $ `I trust the information I receive about shots.` <dbl> …
## $ `I believe that there could be a link between the MMR vaccination and autism.` <dbl> …
## $ `I worry about possible side effects of vaccinations.` <dbl> …
## $ `I believe the media exaggerates reports about disease outbreak and vaccinations.` <dbl> …
## $ `If I were to have a child today, I would want them to have all of the recommended vaccinations.` <dbl> …
## $ `Healthy children should be required to be vaccinated to attend school because of potential risks to others.` <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:` <chr> …
survey <- survey %>%
rename(Sex = `What is your sex?`,
Age = `What is your age?`,
Education = `What is your highest education completed?`,
Major = `If you went to college, what was your major?`,
Children = `Do you have children?`,
From = `What state are you from?`,
Religiosity = `Are you religious?`,
Vaccinate = `I believe children should be vaccinated.`,
Shots_Information = `I trust the information I receive about shots.`,
Autism = `I believe that there could be a link between the MMR vaccination and autism.`,
Side_Effects = `I worry about possible side effects of vaccinations.`,
Media = `I believe the media exaggerates reports about disease outbreak and vaccinations.`,
Have_all = `If I were to have a child today, I would want them to have all of the recommended vaccinations.`,
Healthy_Children = `Healthy children should be required to be vaccinated to attend school because of potential risks to others.`)
glimpse(survey)
## Observations: 305
## Variables: 16
## $ Timestamp <chr> …
## $ Sex <chr> …
## $ Age <chr> …
## $ Education <chr> …
## $ Major <chr> …
## $ Children <chr> …
## $ From <chr> …
## $ Religiosity <dbl> …
## $ Vaccinate <dbl> …
## $ Shots_Information <dbl> …
## $ Autism <dbl> …
## $ Side_Effects <dbl> …
## $ Media <dbl> …
## $ Have_all <dbl> …
## $ Healthy_Children <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:` <chr> …
1.
Age of participants:
survey %>%
mutate(Age = as.numeric(Age))
## Warning: NAs introduced by coercion
Adds new variables and preserves existing ones
survey <- survey %>%
mutate(Age = as.numeric(Age))
## Warning: NAs introduced by coercion
Check:
glimpse(survey)
## Observations: 305
## Variables: 16
## $ Timestamp <chr> …
## $ Sex <chr> …
## $ Age <dbl> …
## $ Education <chr> …
## $ Major <chr> …
## $ Children <chr> …
## $ From <chr> …
## $ Religiosity <dbl> …
## $ Vaccinate <dbl> …
## $ Shots_Information <dbl> …
## $ Autism <dbl> …
## $ Side_Effects <dbl> …
## $ Media <dbl> …
## $ Have_all <dbl> …
## $ Healthy_Children <dbl> …
## $ `If you wish to expand on any of your answers above, do so here:` <chr> …
2.
Have Childern or not:
survey %>%
mutate(Children = as_factor(Children))
Grouping contrast
survey %>%
drop_na(Children, Have_all) %>%
group_by(Children) %>%
summarize(Have = mean(Have_all))
Graph:
survey %>%
drop_na(Children, Have_all) %>%
group_by(Children) %>%
summarize(Have = mean(Have_all)) %>%
ggplot(aes(x = Children, y = Have)) +
geom_col()
Do the t-test:
t.test(survey$Have_all ~ survey$Children)
##
## Welch Two Sample t-test
##
## data: survey$Have_all by survey$Children
## t = -3.211, df = 224.92, p-value = 0.001516
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7239792 -0.1733153
## sample estimates:
## mean in group No mean in group Yes
## 1.345679 1.794326
3
Correlate the data:
survey %>%
select(Age, Religiosity, Vaccinate, Autism, Media, Have_all, Healthy_Children) %>%
correlate() %>%
shave()
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
Graph
survey %>%
select(Age, Religiosity, Vaccinate, Autism, Media, Have_all, Healthy_Children) %>%
correlate() %>%
shave() %>%
rplot()
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
4
Sex:
survey_sex <- survey %>%
mutate(Sex = str_to_lower(Sex), Vaccinate = as_factor(Vaccinate))
survey_sex %>%
count(Sex) %>%
datatable()
Sorting data:
survey_sex$Sex <- recode(survey_sex$Sex, "m" = "male", "republican ( male)" = "male", "f" = "female", "femail" = "female", "femal" = "female", "feme" = "female", "replace sex with gender! female" = "female", "girl" = "female", "gemale" = "female", "apache attack helicopter" = "not enough", "californian" = "not enough", "mucho" = "not enough", "snap-on tool box" = "not enough", "the rough, passionate kind" = "not enough")
Check:
survey_sex %>%
count(Sex) %>%
datatable()
survey_sex %>%
count(Vaccinate) %>%
datatable()
## Warning: Factor `Vaccinate` contains implicit NA, consider using
## `forcats::fct_explicit_na`
survey_sex <- survey_sex %>%
mutate(Vaccinate_simple = fct_collapse(Vaccinate,
agree = c("1", "2"),
disagree = c("4", "5"),
middle = c("3")))
survey_sex %>%
count(Vaccinate_simple)
## Warning: Factor `Vaccinate_simple` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Graph:
survey_sex %>%
drop_na(Vaccinate_simple) %>%
drop_na(Sex) %>%
ggplot(aes(x = Vaccinate_simple, fill = Sex)) +
geom_bar(position = "fill") +
coord_flip() +
scale_fill_viridis_d()
survey_sex %>%
drop_na(Vaccinate_simple) %>%
drop_na(Sex) %>%
ggplot(aes(x = Vaccinate_simple, fill = Sex)) +
geom_bar(position = "dodge") +
theme_minimal() +
coord_flip() +
scale_fill_viridis_d()
5
Major
survey_major <- survey %>%
mutate(Major = as_factor(Major))
survey_major %>%
count(Major)
## Warning: Factor `Major` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Create a word cloud of the major:
survey_major %>%
count(Major, sort = T) %>%
top_n(100) %>%
wordcloud2(size = 3)
## Warning: Factor `Major` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Selecting by n