{r} setup, include=FALSE} chooseCRANmirror(graphics=FALSE, ind=1) knitr::opts_chunk$set(echo = TRUE)
pckgs <- c("scales","readxl","hrbrthemes")
install.packages(pckgs, repos = "http://cran.us.r-project.org")
## Installing packages into 'C:/Users/tharu/OneDrive/Documents/R/win-library/4.1'
## (as 'lib' is unspecified)
## package 'scales' successfully unpacked and MD5 sums checked
## package 'readxl' successfully unpacked and MD5 sums checked
## package 'hrbrthemes' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tharu\AppData\Local\Temp\RtmpK25LFV\downloaded_packages
library("readxl")
library(scales)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Data_Guide <- read_excel("~/R_Files/Data_Guide.xls")
Accidents <- read.csv("~/R_Files/Accidents.csv")
acc<- drop_na(Accidents)
Accidents<- subset(acc,Road_Type!="-1" & Junction_Detail!="-1" & Junction_Control!="-1" & Pedestrian_Crossing_Human_Control!="-1" & Pedestrian_Crossing_Physical_Facilities!="-1" & Light_Conditions!="-1" & Weather_Conditions!="-1" & Road_Surface_Conditions!="-1" & Special_Conditions_at_Site!="-1" & Carriageway_Hazards!="-1")
head(Accidents)
Acc_RoadType <- Accidents %>% dplyr::count(Road_Type)
print(Acc_RoadType)
## Road_Type n
## 1 1 7853
## 2 2 2072
## 3 3 9020
## 4 6 52056
## 5 7 985
## 6 9 424
Road_percent <- round(Acc_RoadType['n'] / NROW(Accidents) * 100)
print(Road_percent)
## n
## 1 11
## 2 3
## 3 12
## 4 72
## 5 1
## 6 1
Road_Type_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 9)
Road_types <- Road_Type_Guide['label'][1:7,1]
print(Road_types)
## # A tibble: 7 x 1
## label
## <chr>
## 1 Roundabout
## 2 One way street
## 3 Dual carriageway
## 4 Single carriageway
## 5 Slip road
## 6 Unknown
## 7 One way street/Slip road
library(RColorBrewer)
myPalette <- brewer.pal(9, "Pastel1")
pie(Road_percent$n, labels = Acc_RoadType$n , border = "white" , col=myPalette, main = "Number of Accidents based on Road Types" , radius = 1.02, edges =6)
legend("topleft",fill = myPalette, legend =Road_types$label ,xpd=TRUE, inset=c(0, -0.0), cex=.65 )
From the above plot, We can observe that the maximum number of accidents happened on the road type “Single Carriageway” and least number of accidents happened on the road type “One Way Street/Slip road”.
Speed_Limit_Count <- Accidents %>% dplyr::count(Speed_limit)
barplot(height=Speed_Limit_Count$n,names=Speed_Limit_Count$Speed_limit,
col=myPalette,
xlab="Speed Limit",
ylab="Number of Accidents",
main="Speed Limit VS Number of Accidents",
ylim=c(0,90000)
)
It can be observed that maximum number of accidents on the roads having a speed limit of 30 while the roads having speed limit as 70 has least number of accidents recorder.
Junction_Detail_Count <- Accidents %>% dplyr::count(Junction_Detail)
Junction_Detail_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 10)
Junction_Detail_Count = merge(x = Junction_Detail_Count, y = Junction_Detail_Guide, by.x = "Junction_Detail", by.y = "code", all.x = TRUE, all.y = FALSE)
print(Junction_Detail_Count)
## Junction_Detail n label
## 1 0 19 Not at junction or within 20 metres
## 2 1 10462 Roundabout
## 3 2 1474 Mini-roundabout
## 4 3 37366 T or staggered junction
## 5 5 2002 Slip road
## 6 6 11916 Crossroads
## 7 7 1458 More than 4 arms (not roundabout)
## 8 8 3145 Private drive or entrance
## 9 9 4568 Other junction
barplot(height=Junction_Detail_Count$n,names=Junction_Detail_Count$label,
col=myPalette,
xlab="Junction Type",
ylab="Number of Accidents",
main="Junction Type VS Number of Accidents",
ylim = c(0,40000) ,xaxt = 'n'
)
legend("topright", legend=Junction_Detail_Count$label, fill = myPalette, xpd=TRUE, inset=c(0, -0.0), cex=.65)
T or Staggered Junction
Junction_Control_Count <- Accidents %>% dplyr::count(Junction_Control)
Junction_Control_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 11)
Junction_Control_Count = merge(x = Junction_Control_Count, y = Junction_Control_Guide, by.x = "Junction_Control", by.y = "code", all.x = TRUE, all.y = FALSE)
print(Junction_Detail_Count)
## Junction_Detail n label
## 1 0 19 Not at junction or within 20 metres
## 2 1 10462 Roundabout
## 3 2 1474 Mini-roundabout
## 4 3 37366 T or staggered junction
## 5 5 2002 Slip road
## 6 6 11916 Crossroads
## 7 7 1458 More than 4 arms (not roundabout)
## 8 8 3145 Private drive or entrance
## 9 9 4568 Other junction
barplot(height=Junction_Control_Count$n,names=Junction_Control_Count$label,
col=myPalette,
xlab="Junction Control",
ylab="Number of Accidents",
main="Junction Control VS Number of Accidents",
ylim = c(0,60000) ,xaxt = 'n'
)
legend("topleft", legend=Junction_Control_Count$label, fill = myPalette, xpd=TRUE, inset=c(0, -0.0), cex=.65)
Juntion control
Acc_LightConditions <- Accidents %>% dplyr::count(Light_Conditions)
print(Acc_LightConditions)
## Light_Conditions n
## 1 1 51906
## 2 4 17059
## 3 5 482
## 4 6 1537
## 5 7 1426
Light_Percent <- round(Acc_LightConditions['n'] / NROW(Accidents) * 100)
print(Light_Percent)
## n
## 1 72
## 2 24
## 3 1
## 4 2
## 5 2
Acc_LightConditions$Percent <- Light_Percent$n
print(Acc_LightConditions)
## Light_Conditions n Percent
## 1 1 51906 72
## 2 4 17059 24
## 3 5 482 1
## 4 6 1537 2
## 5 7 1426 2
Light_Conditions_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 15)
Light_Conditions <- merge(x = Acc_LightConditions, y = Light_Conditions_Guide, by.x = "Light_Conditions", by.y = "code", all.x = TRUE, all.y = TRUE)
print(Light_Conditions)
## Light_Conditions n Percent label
## 1 -1 NA NA Data missing or out of range
## 2 1 51906 72 Daylight
## 3 4 17059 24 Darkness - lights lit
## 4 5 482 1 Darkness - lights unlit
## 5 6 1537 2 Darkness - no lighting
## 6 7 1426 2 Darkness - lighting unknown
pie(Acc_LightConditions$Percent, labels = Acc_LightConditions$Percent , border = "white" , col=myPalette, main = "% Number of Accidents based on Light Conditions", radius = 1.0)
legend("topright",fill = myPalette, legend = Light_Conditions$label,xpd=TRUE, inset=c(0, -0.0), cex=.65 )
> Light Conditions
Acc_WeatherConditions <- Accidents %>% dplyr::count(Weather_Conditions)
print(Acc_WeatherConditions)
## Weather_Conditions n
## 1 1 59646
## 2 2 8076
## 3 3 264
## 4 4 663
## 5 5 507
## 6 6 42
## 7 7 280
## 8 8 1412
## 9 9 1520
Weather_Percent <- round(Acc_WeatherConditions['n'] / NROW(Accidents) * 100, digits = 2)
print(Weather_Percent)
## n
## 1 82.37
## 2 11.15
## 3 0.36
## 4 0.92
## 5 0.70
## 6 0.06
## 7 0.39
## 8 1.95
## 9 2.10
Acc_WeatherConditions$Percent <- Weather_Percent$n
print(Acc_WeatherConditions)
## Weather_Conditions n Percent
## 1 1 59646 82.37
## 2 2 8076 11.15
## 3 3 264 0.36
## 4 4 663 0.92
## 5 5 507 0.70
## 6 6 42 0.06
## 7 7 280 0.39
## 8 8 1412 1.95
## 9 9 1520 2.10
Weather_Conditions_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 16)
Weather_Conditions <- merge(x = Acc_WeatherConditions, y = Weather_Conditions_Guide, by.x = "Weather_Conditions", by.y = "code", all.x = TRUE, all.y = TRUE)
print(Weather_Conditions)
## Weather_Conditions n Percent label
## 1 -1 NA NA Data missing or out of range
## 2 1 59646 82.37 Fine no high winds
## 3 2 8076 11.15 Raining no high winds
## 4 3 264 0.36 Snowing no high winds
## 5 4 663 0.92 Fine + high winds
## 6 5 507 0.70 Raining + high winds
## 7 6 42 0.06 Snowing + high winds
## 8 7 280 0.39 Fog or mist
## 9 8 1412 1.95 Other
## 10 9 1520 2.10 Unknown
pie(Acc_WeatherConditions$Percent, labels = "" , border = "white" , col=myPalette, main = "Number of Accidents based on Weather Conditions", radius = 1 )
legend("topright",fill = myPalette, legend = Weather_Conditions$label,xpd=TRUE, inset=c(0, -0.0), cex=.65 )
Weather conditons
Road_Surface_Count <- Accidents %>% dplyr::count(Road_Surface_Conditions)
Road_Surface_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 17)
Road_Surface_Count = merge(x = Road_Surface_Count, y = Road_Surface_Guide, by.x = "Road_Surface_Conditions", by.y = "code", all.x = TRUE, all.y = FALSE)
print(Road_Surface_Count)
## Road_Surface_Conditions n label
## 1 1 53348 Dry
## 2 2 17971 Wet or damp
## 3 3 160 Snow
## 4 4 915 Frost or ice
## 5 5 16 Flood over 3cm. deep
barplot(height=Road_Surface_Count$n,names=Road_Surface_Count$label,
col=myPalette,
xlab="",
ylab="",
main="Road Surface Conditions VS Number of Accidents"
)
legend("topright", legend=Road_Surface_Count$label, fill = myPalette, xpd=TRUE, inset=c(0, -0.0), cex=.65)
Road Surface Conditions VS Number of Accidents
Day_Count <- Accidents %>% dplyr::count(Day_of_Week)
Day_Guide <- read_excel("~/R_Files/Data_Guide.xls", sheet = 5)
Day_Count = merge(x = Day_Count, y = Day_Guide, by.x = "Day_of_Week", by.y = "code", all.x = TRUE, all.y = FALSE)
print(Day_Count)
## Day_of_Week n label
## 1 1 7902 Sunday
## 2 2 10036 Monday
## 3 3 11034 Tuesday
## 4 4 10964 Wednesday
## 5 5 11084 Thursday
## 6 6 11918 Friday
## 7 7 9472 Saturday
ggplot( Day_Count,aes(x=label, y=n, group = 1,)) +
geom_line( color="grey",size=2, alpha=0.9, linetype=1) +
geom_point(shape=21, color="black", fill="#69b3a2", size=3) +
theme_classic()+
ggtitle("Number Accidents based on day of the Week")+
xlab("Day of Week")+
ylab("Number of Accidents")+
xlim("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday")
Number Accidents based on day of the Week
grouped_data <- aggregate(Accidents, by=list(Accidents$Day_of_Week, Accidents$Time), FUN=length);
grouped_data$Group.2<- hm(grouped_data$Group.2)
## Warning in .parse_hms(..., order = "HM", quiet = quiet): Some strings failed to
## parse, or all strings are NAs
grouped_data
ggplot(grouped_data, aes(x=Group.2, y=Group.1)) + geom_tile(aes(fill=Latitude))+
scale_x_continuous(expand = c(0, 0))+
scale_y_continuous(expand = c(0, 0))
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 8514 rows containing missing values (geom_tile).
Time_Count <- Accidents %>% dplyr::count(Time)
Time_Count$Time<- hm(Time_Count$Time)
## Warning in .parse_hms(..., order = "HM", quiet = quiet): Some strings failed to
## parse, or all strings are NAs
Time_Count
ggplot( Time_Count,aes(x=Time, y=n, group = 1,)) +
geom_line( color="grey",size=2, alpha=0.9, linetype=1)
## Warning: Removed 1433 row(s) containing missing values (geom_path).