MATH1324 Introduction to Statistics Assignment 3

Road traffic Accidents Across Leeds

Mounika Gudapati(s3748316), Chaitanyagopi Amirineni(s3734134), Harpreet kaur maan(s3732990)

Last updated: 28 October, 2018

Introduction

Problem Statement

Data

Accidents_leeds <- read_csv("Copy of Leeds_RTC_2016.csv")

Data Cont.

numeric_scale <- scale(Accidents_leeds$`Number of Vehicles`)
head(numeric_scale)
##            [,1]
## [1,] 0.07626119
## [2,] 0.07626119
## [3,] 0.07626119
## [4,] 0.07626119
## [5,] 0.07626119
## [6,] 0.07626119

Descriptive Statistics and Visualisation

Accidents_T1 <- table(Accidents_leeds$`Casualty Severity`,Accidents_leeds$`Number of Vehicles`)
knitr::kable(Accidents_T1) 
1 2 3 4 5 6 10
Fatal 5 3 1 0 0 0 0
Serious 128 169 17 5 3 0 0
Slight 494 1418 202 87 12 3 2

Descriptive Statistics and Visualisation Cont.

Accidents_T2 <- table(Accidents_leeds$`Casualty Severity`,Accidents_leeds$`Number of Vehicles`)%>% prop.table(margin = 1)
knitr::kable(Accidents_T2)       
1 2 3 4 5 6 10
Fatal 0.5555556 0.3333333 0.1111111 0.0000000 0.0000000 0.0000000 0.0000000
Serious 0.3975155 0.5248447 0.0527950 0.0155280 0.0093168 0.0000000 0.0000000
Slight 0.2227232 0.6393147 0.0910730 0.0392245 0.0054103 0.0013526 0.0009017

Descriptive Statistics and Visualisation Cont.

barplot(Accidents_T2, main="number of vechiles colided vs Severity",ylim = c(0,0.8),ylab = "proportion of Severity",xlab="Number of vechiles collided", col=c("black","red","Green"),beside=TRUE)

legend("topright", 
       legend = rownames(Accidents_T2), 
       fill = 1:6, ncol = 3,
       cex = 0.75)

Decsriptive Statistics Cont.

Accidents_leeds$`Number of Vehicles`<- as.numeric(Accidents_leeds$`Number of Vehicles`)


Accidents_leeds %>% group_by(`Casualty Severity`) %>% summarise(Min = min(`Number of Vehicles`,na.rm = TRUE),
                                           Q1 = quantile(`Number of Vehicles`,probs = .25,na.rm = TRUE),
                                           Median = median(`Number of Vehicles`, na.rm = TRUE),
                                           Q3 = quantile(`Number of Vehicles`,probs = .75,na.rm = TRUE),
                                           Max = max(`Number of Vehicles`,na.rm = TRUE),
                                           Mean = mean(`Number of Vehicles`, na.rm = TRUE),
                                           SD = sd(`Number of Vehicles`, na.rm = TRUE),
                                           n = n(),
                                           Missing = sum(is.na(`Number of Vehicles`))) -> table1
knitr::kable(table1)
Casualty Severity Min Q1 Median Q3 Max Mean SD n Missing
Fatal 1 1 1 2 3 1.555556 0.7264832 9 0
Serious 1 1 2 2 5 1.714286 0.7185011 322 0
Slight 1 2 2 2 10 1.975654 0.7735727 2218 0

Hypothesis Testing

Hypothesis Testing Cont.

chi1<-chisq.test(table(Accidents_leeds$`Number of Vehicles`,Accidents_leeds$`Casualty Severity`))
chi1
## 
##  Pearson's Chi-squared test
## 
## data:  table(Accidents_leeds$`Number of Vehicles`, Accidents_leeds$`Casualty Severity`)
## X-squared = 56.638, df = 12, p-value = 9.185e-08
pchisq(q=56.638,df=12,lower.tail = FALSE)
## [1] 9.186916e-08

Hypothesis Testing Cont.

chi1$observed
##     
##      Fatal Serious Slight
##   1      5     128    494
##   2      3     169   1418
##   3      1      17    202
##   4      0       5     87
##   5      0       3     12
##   6      0       0      3
##   10     0       0      2

Hypothesis Testing Cont.

chi1$expected
##     
##            Fatal     Serious      Slight
##   1  2.213809337  79.2051785  545.581012
##   2  5.613966261 200.8552373 1383.530796
##   3  0.776775206  27.7912907  191.431934
##   4  0.324833268  11.6218125   80.053354
##   5  0.052961946   1.8948607   13.052177
##   6  0.010592389   0.3789721    2.610435
##   10 0.007061593   0.2526481    1.740290

Hypothesis Testing Cont.

chi1$observed - chi1$expected %>% round(2)
##     
##       Fatal Serious Slight
##   1    2.79   48.79 -51.58
##   2   -2.61  -31.86  34.47
##   3    0.22  -10.79  10.57
##   4   -0.32   -6.62   6.95
##   5   -0.05    1.11  -1.05
##   6   -0.01   -0.38   0.39
##   10  -0.01   -0.25   0.26

Hypthesis Testing Cont.

\[χ2=∑(Oij−Eij)ˆ2/Eij\]

\[df=(r-1)(c-1)\]

Discussion

Conclusion

References