library(tidyverse)
library(magrittr) # %<>%
library(DT) # datatable()
library(GGally) # ggpairs()
library(infer) # rep_sample_n()

options(digits = 3, scipen = 999)

1

(a)

load("G:/My Drive/homework/Cooper P/covid_example_population.RData")
names(covid_pop) %<>% str_to_title()

covid_pop %>%
  datatable(options = list(pageLength = 5))
covid_pop %>%
  mutate(across(.cols = everything(), as_factor)) %>%
  summary()
##          Symptoms           Contact           Infected   
##  asymptomatic:9398   not contact:8956   uninfected:9704  
##  symptomatic : 602   contact    :1044   infected  : 296
covid_pop %>%
  mutate(across(.cols = everything(), as_factor)) %>%
  ggpairs()

with(covid_pop, table(Symptoms, Infected)) # %>% addmargins()
##               Infected
## Symptoms       infected uninfected
##   asymptomatic      162       9236
##   symptomatic       134        468
with(covid_pop, ftable(Contact, Symptoms, Infected))
##                          Infected infected uninfected
## Contact     Symptoms                                 
## contact     asymptomatic               123        798
##             symptomatic                 87         36
## not contact asymptomatic                39       8438
##             symptomatic                 47        432

(b)

covid_pop_n500 <-
  covid_pop %>%
  slice_sample(n = 500, replace = FALSE)

p_hat_n500 <- 
  covid_pop_n500 %>%
  filter(Infected == "infected") %>%
  nrow() / 500

(d)

covid_pop_n500_symptomatic <-
  covid_pop %>%
  filter(Symptoms == "symptomatic") %>%
  slice_sample(n = 500, replace = FALSE)

p_hat_n500_symptomatic <-
    covid_pop_n500_symptomatic %>%
    filter(Infected == "infected") %>%
    nrow() / 500

(g)

covid_pop_n10000_contact_symptomatic <-
  covid_pop %>%
  filter(Contact == "contact" | Symptoms == "symptomatic")

covid_pop_n10000_contact_symptomatic %>%
  filter(Infected == "infected") %>%
  nrow()
## [1] 257
(p_hat_n10000_contact_symptomatic <- 
  covid_pop_n10000_contact_symptomatic %>%
  filter(Infected == "infected") %>% 
  nrow() / nrow(covid_pop_n10000_contact_symptomatic)
  )
## [1] 0.169

(h)

Sample

(i)

covid_pop_n10000_symptomatic <-
  covid_pop %>%
  filter(Symptoms == "symptomatic")

sum(covid_pop_n10000_contact_symptomatic$Infected == "infected") -
  sum(covid_pop_n10000_symptomatic$Infected == "infected")
## [1] 123
(p_hat_n10000_symptomatic <-
  covid_pop_n10000_symptomatic %>%
  filter(Infected == "infected") %>%
  nrow() / nrow(covid_pop_n10000_symptomatic)
)
## [1] 0.223

(j)

p_hat_n10000 <-
    covid_pop %>%
    filter(Infected == "infected") %>%
    nrow() / 10000

data.frame(p_hat_n500,
           p_hat_n500_symptomatic,
           p_hat_n10000_contact_symptomatic,
           p_hat_n10000_symptomatic,
           p_hat_n10000,
           row.names = "Proportion_Infected") %>%
  t()
##                                  Proportion_Infected
## p_hat_n500                                    0.0180
## p_hat_n500_symptomatic                        0.2240
## p_hat_n10000_contact_symptomatic              0.1687
## p_hat_n10000_symptomatic                      0.2226
## p_hat_n10000                                  0.0296

2

(a)

covid_pop_n500_rep1000 <-
  covid_pop %>%
  rep_sample_n(size = 500, reps = 1000, replace = FALSE)

covid_pop_n500_rep1000_distrubtion <-
  covid_pop_n500_rep1000 %>%
  group_by(replicate) %>%
  summarize(Proportion_Infected = sum(Infected == "infected") / 500)

(b)

std_error_n500_rep1000 <-
   sd(covid_pop_n500_rep1000_distrubtion$Proportion_Infected) / sqrt(500)

(d)

covid_pop_n1000_rep1000 <-
  covid_pop %>%
  rep_sample_n(size = 1000, reps = 1000, replace = TRUE)

covid_pop_n1000_rep1000_distrubtion  <-
  covid_pop_n1000_rep1000 %>%
  group_by(replicate) %>%
  summarize(Proportion_Infected = sum(Infected == "infected") / 1000)

std_error_n1000_rep1000 <-
   sd(covid_pop_n1000_rep1000_distrubtion$Proportion_Infected) / sqrt(1000)

(e)

covid_pop_n2500_rep1000 <-
  covid_pop %>%
  rep_sample_n(size = 2500, reps = 1000, replace = TRUE)

covid_pop_n2500_rep1000_distrubtion  <-
  covid_pop_n2500_rep1000 %>%
  group_by(replicate) %>%
  summarize(Proportion_Infected = sum(Infected == "infected") / 2500)

std_error_n2500_rep1000 <-
   sd(covid_pop_n2500_rep1000_distrubtion$Proportion_Infected) / sqrt(2500)

(g)

(i)

covid_pop_symptomatic_n500_rep1000 <-
  covid_pop %>%
  filter(Symptoms == "symptomatic") %>%
  rep_sample_n(size = 500, reps = 1000, replace = TRUE)

covid_pop_symptomatic_n500_rep1000_distrubtion <-
  covid_pop_symptomatic_n500_rep1000 %>%
  group_by(replicate) %>%
  summarize(Proportion_Infected = sum(Infected == "infected") / 500)

std_error_symptomatic_n500_rep1000 <-
  sd(covid_pop_symptomatic_n500_rep1000_distrubtion$Proportion_Infected) / sqrt(500)

(ii)

tibble("n500" = covid_pop_n500_rep1000_distrubtion$Proportion_Infected,
       "n1000" = covid_pop_n1000_rep1000_distrubtion$Proportion_Infected,
       "n2500" = covid_pop_n2500_rep1000_distrubtion$Proportion_Infected,
       "n500symptomatic" =
         covid_pop_symptomatic_n500_rep1000_distrubtion$Proportion_Infected
       ) %>%
  slice_sample(n = 10)
## # A tibble: 10 x 4
##     n500 n1000  n2500 n500symptomatic
##    <dbl> <dbl>  <dbl>           <dbl>
##  1 0.026 0.039 0.0332           0.216
##  2 0.024 0.021 0.0276           0.208
##  3 0.024 0.037 0.0272           0.224
##  4 0.026 0.026 0.026            0.226
##  5 0.026 0.036 0.0228           0.228
##  6 0.026 0.032 0.0304           0.246
##  7 0.03  0.033 0.0256           0.222
##  8 0.038 0.031 0.0272           0.236
##  9 0.022 0.036 0.0248           0.194
## 10 0.026 0.02  0.032            0.23
bind_rows("n0500" = covid_pop_n500_rep1000_distrubtion,
          "n1000" = covid_pop_n1000_rep1000_distrubtion,
          "n2500" = covid_pop_n2500_rep1000_distrubtion,
          "n500symptomatic" = covid_pop_symptomatic_n500_rep1000_distrubtion,
          .id = "SampleSize"
          ) -> combined.distributions

combined.distributions %>%
  ggplot(aes(Proportion_Infected, fill = SampleSize)) +
  geom_histogram() + # How to adjust bins ?
  geom_vline(xintercept = p_hat_n10000, color = "red") +
  facet_grid(cols = vars(SampleSize))

combined.distributions %>%
  ggplot(aes(Proportion_Infected, fill = SampleSize)) +
  geom_histogram() +
  geom_vline(xintercept = p_hat_n10000, color = "red", size = 0.66) +
  facet_grid(cols = vars(SampleSize), scales = "free_x") +
  labs(subtitle = "Free X Scales")

combined.distributions %>%
  ggplot(aes(y = Proportion_Infected, fill = SampleSize)) +
  geom_boxplot() +
  geom_hline(yintercept = p_hat_n10000, color = "red") +
  # facet_grid(cols = vars(SampleSize)) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

combined.distributions %>%
  group_by(SampleSize) %>%
  summarise(Min = min(Proportion_Infected),
            # Q1 = quantile(Proportion_Infected, 0.25),
            Median = median(Proportion_Infected),
            Mean = mean(Proportion_Infected),
            # Q3 = quantile(Proportion_Infected, 0.75),
            Max = max(Proportion_Infected)
            ) %>%
  mutate("Std_ErrorEE-7" = c(std_error_n500_rep1000,
                       std_error_n1000_rep1000,
                       std_error_n2500_rep1000,
                       std_error_symptomatic_n500_rep1000) *
           10^7
         ) 
## # A tibble: 4 x 6
##   SampleSize         Min Median   Mean   Max `Std_ErrorEE-7`
##   <chr>            <dbl>  <dbl>  <dbl> <dbl>           <dbl>
## 1 n0500           0.01   0.028  0.0292 0.062           3368.
## 2 n1000           0.014  0.029  0.0296 0.048           1618.
## 3 n2500           0.0204 0.0296 0.0296 0.04             662.
## 4 n500symptomatic 0.168  0.222  0.222  0.29            8127.

3

Quadrupled the tests and doubled the positive cases. 75% of positive cases are for asymptomatic & no-contact citizens.

1000 * 0.01
## [1] 10
4000 * 0.005
## [1] 20

5

(a)

https://cals.arizona.edu/classes/rnr321/Ch4.pdf#page=3

(covid_pop %>%
  group_by(Symptoms) %>%
  summarise(Infections = sum(Infected == "infected"),
            N = NROW(Symptoms), 
            Proportion_Infected = Infections / N) ->
  covid_pop_stratefied)
## # A tibble: 2 x 4
##   Symptoms     Infections     N Proportion_Infected
##   <chr>             <int> <int>               <dbl>
## 1 asymptomatic        162  9398              0.0172
## 2 symptomatic         134   602              0.223
(p_hat_n10000_stratefied <-
  (500*covid_pop_stratefied$Proportion_Infected[1] +
  500*covid_pop_stratefied$Proportion_Infected[2]) /
  (500 + 500))
## [1] 0.12

(b)

(p_hat_n500_rep1000_asymptomatic <-
  covid_pop %>%
  filter(Symptoms == "asymptomatic") %>%
  rep_sample_n(size = 500, reps = 1000, replace = TRUE) %>%
  group_by(replicate) %>%
  summarise(Proportion_Infected = sum(Infected == "infected") / 500) %>%
  pull(Proportion_Infected) %>%
  mean())
## [1] 0.0173
(p_hat_n500_rep1000_symptomatic <-
  covid_pop %>%
  filter(Symptoms == "symptomatic") %>%
  rep_sample_n(size = 500, reps = 1000, replace = TRUE) %>%
  group_by(replicate) %>%
  summarise(Proportion_Infected = sum(Infected == "infected") / 500) %>%
  pull(Proportion_Infected) %>%
  mean())
## [1] 0.223
(p_hat_n500_rep1000_stratefied <-
  (500 * p_hat_n500_rep1000_asymptomatic + 500 * p_hat_n500_rep1000_symptomatic) /
  (500 + 500))
## [1] 0.12