library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
COVID <- read_csv("COVID.csv")
## Parsed with column specification:
## cols(
##   Likelihood_Infected = col_double(),
##   Facemask_Wear = col_double(),
##   Household_Size = col_double()
## )
View(COVID)
head(COVID)
## # A tibble: 6 x 3
##   Likelihood_Infected Facemask_Wear Household_Size
##                 <dbl>         <dbl>          <dbl>
## 1                   4             1              2
## 2                   2             0              1
## 3                   2             1              3
## 4                   2             1              7
## 5                   0             1              3
## 6                   2             1              4

Skills Drill

new_COVID <- COVID %>%
  mutate(Likelihood_Infected = 
                ifelse(Likelihood_Infected==0,"Not likely at all", 
                ifelse(Likelihood_Infected==1,"Not too likely", 
                ifelse(Likelihood_Infected==2,"Somewhat likely", 
                ifelse(Likelihood_Infected==3,"Very likely", 
                ifelse(Likelihood_Infected==4,"I have already contracted the virus.",NA))))),
        Likelihood_Infected = factor(Likelihood_Infected,
                        levels=c("Not likely at all", "Not too likely","Somewhat likely", "Very likely", "I have already contracted the virus.")),
        Facemask_Wear = factor(ifelse(Facemask_Wear==0,"No",
                        ifelse(Facemask_Wear==1,"Yes", NA)),
                        levels=c("Yes","No")))

head(new_COVID)
## # A tibble: 6 x 3
##   Likelihood_Infected                  Facemask_Wear Household_Size
##   <fct>                                <fct>                  <dbl>
## 1 I have already contracted the virus. Yes                        2
## 2 Somewhat likely                      No                         1
## 3 Somewhat likely                      Yes                        3
## 4 Somewhat likely                      Yes                        7
## 5 Not likely at all                    Yes                        3
## 6 Somewhat likely                      Yes                        4

How do those reporting different likelihood of contracting COVID (Likelihood_Infected) differ in their reporting of wearing a facemask(Facemask_Wear).?

table(new_COVID$Likelihood_Infected, new_COVID$Facemask_Wear) %>%
  prop.table(2) %>%
  round(2)
##                                       
##                                         Yes   No
##   Not likely at all                    0.14 0.17
##   Not too likely                       0.37 0.45
##   Somewhat likely                      0.32 0.27
##   Very likely                          0.13 0.09
##   I have already contracted the virus. 0.03 0.03

Those who are not likely at all have a higher percentage for no, which shows they don’t feel wearing a mask is a priority. Those who are not took likely lean more on no probably because they don’t think they should be concerned. Those who are somewhat likely lean more on yes possibly due to cautional reasons. Those who are very likely say yes so they won’t infect other people. Those who already contracted the disease are pretty neutral. It shows that as people are likely to contract it they are likely to say yes on face masks.

Visualization

new_COVID %>%
  filter(!is.na(Likelihood_Infected),!is.na(Facemask_Wear)) %>%
  group_by(Likelihood_Infected, Facemask_Wear) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
ggplot () +
  geom_col(aes(x=Likelihood_Infected, y=percent, fill=Facemask_Wear)) + theme_minimal()

What is the average Household_Size of people in each category of likelihood of contracting COVID(Likelihood_Infected)?

new_COVID %>%
group_by(Likelihood_Infected) %>%
summarize(averageage = mean (Household_Size))
## # A tibble: 5 x 2
##   Likelihood_Infected                  averageage
##   <fct>                                     <dbl>
## 1 Not likely at all                         NA   
## 2 Not too likely                            NA   
## 3 Somewhat likely                            3.91
## 4 Very likely                                4.04
## 5 I have already contracted the virus.       4.36

The average household side increases for those who are likely to get infected.

Compare the Household_Size distribution for those in each category of likelihood of contracting COVID

table(new_COVID$Likelihood_Infected, new_COVID$Household_Size) %>%
  prop.table(2) %>%
  round(2)
##                                       
##                                           0    1    2    3    4    5    6    7
##   Not likely at all                    1.00 0.17 0.14 0.18 0.19 0.11 0.00 0.07
##   Not too likely                       0.00 0.42 0.45 0.36 0.36 0.42 0.38 0.50
##   Somewhat likely                      0.00 0.33 0.26 0.31 0.34 0.27 0.33 0.29
##   Very likely                          0.00 0.08 0.11 0.13 0.10 0.17 0.25 0.07
##   I have already contracted the virus. 0.00 0.00 0.05 0.02 0.02 0.03 0.04 0.07
##                                       
##                                           8    9   10   15
##   Not likely at all                    0.07 0.00 0.00 0.00
##   Not too likely                       0.20 0.60 0.00 0.00
##   Somewhat likely                      0.53 0.00 1.00 1.00
##   Very likely                          0.07 0.40 0.00 0.00
##   I have already contracted the virus. 0.13 0.00 0.00 0.00

The results show folks that are not too likely, somewhat likely, and very likely to get infected have a higher household size. We also see that folks that are not too likely have a smaller household size. This shows that folks who have a higher household size have a higher chance of being infected due to more exposure and people in the house.