Math1324 Assignment2

How does the pandemic affects family incidents in Victoria ?

Zhihang Jian (s3958653)

Last updated: 16 October, 2022

Introduction

Problem Statement

caption Source: Department of Health

Data

Data Cont.

# using readr package 
family <- read_xlsx("/Users/noalgreen/Desktop/familyincidents.xlsx")
family %>% head(3)

Data Cont. Subsetting

familynew <- family %>% subset( , c(1,3, 6))
#convert the data to longer version for further analysis 
familynew2 <- familynew %>% pivot_longer(cols = c(2,3), names_to = "year", values_to = "case_counting")
familynew2 %>% head(5)

Descriptive Statistics and Visualisation

BoxPlot <- ggplot(data = familynew2, aes(x=year, y =case_counting)) + geom_boxplot(aes(fill=year))
BoxPlot+labs(title = "Median family incidents counting",x = "Year",y = "Case numbers")+stat_summary(fun.y = mean, colour = "red", geom = "point")

Decsriptive Statistics Cont.

familynew2 %>% group_by(year) %>% summarise(Min = min(case_counting,na.rm = TRUE),
                                           Q1 = quantile(case_counting,probs = .25,na.rm = TRUE),
                                           Median = median(case_counting, na.rm = TRUE),
                                           Q3 = quantile(case_counting,probs = .75,na.rm = TRUE),
                                           Max = max(case_counting,na.rm = TRUE),
                                           Mean = mean(case_counting, na.rm = TRUE),
                                           SD = sd(case_counting, na.rm = TRUE),
                                           n = n(),
                                           Missing = sum(is.na(case_counting))) -> table1
knitr::kable(table1)
year Min Q1 Median Q3 Max Mean SD n Missing
2018 14 190.5 733 1453.5 4382 961.8228 930.4594 79 0
2021 15 297.0 915 1663.5 5487 1179.8481 1148.0870 79 0

Hypothesis Testing

model1 <- lm(familynew$`2018` ~ familynew$`2021`, data = familynew)
summary <- model1 %>% summary()
p <- plot(familynew$`2021` ~ familynew$`2018`, data = familynew, xlab = "2018.", ylab = "2021") +
abline(model1, col = "red")

p
## integer(0)
summary <- model1 %>% summary()
summary
## 
## Call:
## lm(formula = familynew$`2018` ~ familynew$`2021`, data = familynew)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -572.26  -42.27  -12.50   65.40  566.14 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      21.51743   27.52234   0.782    0.437    
## familynew$`2021`  0.79697    0.01677  47.524   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 170 on 77 degrees of freedom
## Multiple R-squared:  0.967,  Adjusted R-squared:  0.9666 
## F-statistic:  2259 on 1 and 77 DF,  p-value: < 2.2e-16

Hypthesis Testing Cont.

\[H_0: \mu_1 = \mu_2 \]

\[H_A: \mu_1 \ne \mu_2\]

\[S = \sum^n_{i = 1}d^2_i\]

t.test(
  case_counting ~ year,
  data = familynew2,
  var.equal = FALSE,
  alternative = "two.sided"
  )
## 
##  Welch Two Sample t-test
## 
## data:  case_counting by year
## t = -1.3113, df = 149.58, p-value = 0.1918
## alternative hypothesis: true difference in means between group 2018 and group 2021 is not equal to 0
## 95 percent confidence interval:
##  -546.5550  110.5044
## sample estimates:
## mean in group 2018 mean in group 2021 
##           961.8228          1179.8481

Discussion

References