Zhihang Jian (s3958653)
Last updated: 16 October, 2022
caption Source: Department of Health
# using readr package
family <- read_xlsx("/Users/noalgreen/Desktop/familyincidents.xlsx")
family %>% head(3)familynew <- family %>% subset( , c(1,3, 6))
#convert the data to longer version for further analysis
familynew2 <- familynew %>% pivot_longer(cols = c(2,3), names_to = "year", values_to = "case_counting")
familynew2 %>% head(5)BoxPlot <- ggplot(data = familynew2, aes(x=year, y =case_counting)) + geom_boxplot(aes(fill=year))
BoxPlot+labs(title = "Median family incidents counting",x = "Year",y = "Case numbers")+stat_summary(fun.y = mean, colour = "red", geom = "point")familynew2 %>% group_by(year) %>% summarise(Min = min(case_counting,na.rm = TRUE),
Q1 = quantile(case_counting,probs = .25,na.rm = TRUE),
Median = median(case_counting, na.rm = TRUE),
Q3 = quantile(case_counting,probs = .75,na.rm = TRUE),
Max = max(case_counting,na.rm = TRUE),
Mean = mean(case_counting, na.rm = TRUE),
SD = sd(case_counting, na.rm = TRUE),
n = n(),
Missing = sum(is.na(case_counting))) -> table1
knitr::kable(table1)| year | Min | Q1 | Median | Q3 | Max | Mean | SD | n | Missing |
|---|---|---|---|---|---|---|---|---|---|
| 2018 | 14 | 190.5 | 733 | 1453.5 | 4382 | 961.8228 | 930.4594 | 79 | 0 |
| 2021 | 15 | 297.0 | 915 | 1663.5 | 5487 | 1179.8481 | 1148.0870 | 79 | 0 |
model1 <- lm(familynew$`2018` ~ familynew$`2021`, data = familynew)
summary <- model1 %>% summary()
p <- plot(familynew$`2021` ~ familynew$`2018`, data = familynew, xlab = "2018.", ylab = "2021") +
abline(model1, col = "red")## integer(0)
##
## Call:
## lm(formula = familynew$`2018` ~ familynew$`2021`, data = familynew)
##
## Residuals:
## Min 1Q Median 3Q Max
## -572.26 -42.27 -12.50 65.40 566.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.51743 27.52234 0.782 0.437
## familynew$`2021` 0.79697 0.01677 47.524 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 170 on 77 degrees of freedom
## Multiple R-squared: 0.967, Adjusted R-squared: 0.9666
## F-statistic: 2259 on 1 and 77 DF, p-value: < 2.2e-16
\[H_0: \mu_1 = \mu_2 \]
\[H_A: \mu_1 \ne \mu_2\]
\[S = \sum^n_{i = 1}d^2_i\]
##
## Welch Two Sample t-test
##
## data: case_counting by year
## t = -1.3113, df = 149.58, p-value = 0.1918
## alternative hypothesis: true difference in means between group 2018 and group 2021 is not equal to 0
## 95 percent confidence interval:
## -546.5550 110.5044
## sample estimates:
## mean in group 2018 mean in group 2021
## 961.8228 1179.8481