# Load your data and prepare for visualisation
Crashes1 <- read.csv("D:/Data Visualisation/Assignment2/Crashes1.csv")
head(Crashes1)
## ACCIDENT_DATE ALCOHOLTIME DAY_OF_WEEK LIGHT_CONDITION
## 1 26/05/2012 No Saturday Day
## 2 6/06/2012 No Wednesday Day
## 3 24/05/2012 No Thursday Day
## 4 6/06/2012 No Wednesday Dusk/Dawn
## 5 22/05/2012 No Tuesday Day
## 6 6/06/2012 Yes Wednesday Dark Street lights on
## SEVERITY SPEED_ZONE LONGITUDE LATITUDE
## 1 Other injury accident 60 km/hr 145.0607 -37.81037
## 2 Other injury accident 60 km/hr 144.9912 -37.88316
## 3 Other injury accident 40 km/hr 145.0095 -37.82695
## 4 Serious injury accident 50 km/hr 145.1346 -37.84155
## 5 Serious injury accident 80 km/hr 145.2946 -37.88860
## 6 Serious injury accident 70 km/hr 146.2771 -38.18324
## REGION_NAME INJ_OR_FATAL FATALITY ALCOHOL_RELATED
## 1 METROPOLITAN SOUTH EAST REGION 2 0 No
## 2 METROPOLITAN NORTH WEST REGION 1 0 No
## 3 METROPOLITAN NORTH WEST REGION 1 0 No
## 4 METROPOLITAN SOUTH EAST REGION 1 0 No
## 5 METROPOLITAN SOUTH EAST REGION 1 0 No
## 6 EASTERN REGION 1 0 No
## STAT_DIV_NAME
## 1 Metro
## 2 Metro
## 3 Metro
## 4 Metro
## 5 Metro
## 6 Country
#Filter NA values
Crashes1 <-filter(Crashes1,REGION_NAME!="NULL")
Crashes1 <- filter(Crashes1, SEVERITY!="Non injury accident")
#Order bars in the facet best possible manner
counts <- Crashes1 %>% group_by(REGION_NAME) %>% summarise(count=n())
counts <- as.data.frame(counts)
counts <- counts[order (-counts$count),]
Crashes1$REGION_NAME <- Crashes1$REGION_NAME %>% factor(levels=counts$REGION_NAME)
levels(Crashes1$REGION_NAME)
## [1] "METROPOLITAN SOUTH EAST REGION" "METROPOLITAN NORTH WEST REGION"
## [3] "SOUTH WESTERN REGION" "EASTERN REGION"
## [5] "NORTHERN REGION" "NORTH EASTERN REGION"
## [7] "WESTERN REGION" ""
Crashes1$DAY_OF_WEEK %>% levels
## [1] "" "Friday" "Monday" "Saturday" "Sunday" "Thursday"
## [7] "Tuesday" "Wednesday"
Crashes1$DAY_OF_WEEK <- factor(Crashes1$DAY_OF_WEEK,levels=c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
Crashes1$DAY_OF_WEEK %>% levels
## [1] "Monday" "Tuesday" "Wednesday" "Thursday" "Friday" "Saturday"
## [7] "Sunday"
Crashes1$DAY_OF_WEEK <- Crashes1$DAY_OF_WEEK %>% fct_relevel('Mon','Tue','Wed','Thu','Fri','Sat','Sun')
Crashes1$DAY_OF_WEEK %>% levels
## [1] "Monday" "Tuesday" "Wednesday" "Thursday" "Friday" "Saturday"
## [7] "Sunday"
Crashes1$ACCIDENT_DATE %>% class
## [1] "factor"
Crashes1$ACCIDENT_DATE %>% head()
## [1] 26/05/2012 6/06/2012 24/05/2012 6/06/2012 22/05/2012 6/06/2012
## 2040 Levels: 1/01/2012 1/01/2013 1/01/2014 1/01/2015 1/01/2016 ... 9/12/2016
Crashes1$ACCIDENT_DATE <- Crashes1$ACCIDENT_DATE %>% dmy
Crashes1$ACCIDENT_DATE %>% class
## [1] "Date"
Crashes1$ACCIDENT_DATE %>% year%>% head
## [1] 2012 2012 2012 2012 2012 2012
Crashes1$YEAR <- format(Crashes1$ACCIDENT_DATE, format='%Y')
Crashes1$MONTH <- format(Crashes1$ACCIDENT_DATE, format='%m')
crosstab<-table(Crashes1$YEAR,Crashes1$MONTH, dnn = c( "YEAR","MONTH"))
crosstab
## MONTH
## YEAR 01 02 03 04 05 06 07 08 09 10 11 12
## 2012 1055 1220 1222 1195 1257 1073 1096 1159 1030 1158 1130 1094
## 2013 1009 1173 1225 1159 1239 1157 1092 1106 1108 1203 1180 1166
## 2014 1089 1139 1334 1175 1284 1154 1111 1167 1080 1276 1245 1177
## 2015 1168 1149 1348 1238 1294 1070 1201 1064 1053 1288 1190 1347
## 2016 1124 1288 1290 1282 1305 1145 1180 1177 1011 1142 1144 1100
## 2017 906 1000 1124 919 900 784 479 2 0 0 0 0
crosstab<- data.frame(crosstab) #Convert proportion table to df
str(crosstab) #Data frame summary
## 'data.frame': 72 obs. of 3 variables:
## $ YEAR : Factor w/ 6 levels "2012","2013",..: 1 2 3 4 5 6 1 2 3 4 ...
## $ MONTH: Factor w/ 12 levels "01","02","03",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Freq : int 1055 1009 1089 1168 1124 906 1220 1173 1139 1149 ...
colnames(crosstab) <- c( "YEAR","MONTH", "COUNT")
head(crosstab)
## YEAR MONTH COUNT
## 1 2012 01 1055
## 2 2013 01 1009
## 3 2014 01 1089
## 4 2015 01 1168
## 5 2016 01 1124
## 6 2017 01 906
crosstab3<-table(Crashes1$YEAR,Crashes1$DAY_OF_WEEK, dnn = c( "YEAR","DAY_OF_WEEK"))
crosstab3
## DAY_OF_WEEK
## YEAR Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## 2012 1911 1889 2017 2168 2145 1833 1650
## 2013 1900 2050 2057 2088 2160 1872 1631
## 2014 1899 1952 2167 2143 2181 1876 1861
## 2015 1941 2069 2065 2250 2224 1655 1879
## 2016 1975 2025 2145 2047 2219 1524 1815
## 2017 914 943 838 935 916 550 804
crosstab3<- data.frame(crosstab3) #Convert proportion table to df
str(crosstab3) #Data frame summary
## 'data.frame': 42 obs. of 3 variables:
## $ YEAR : Factor w/ 6 levels "2012","2013",..: 1 2 3 4 5 6 1 2 3 4 ...
## $ DAY_OF_WEEK: Factor w/ 7 levels "Monday","Tuesday",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Freq : int 1911 1900 1899 1941 1975 914 1889 2050 1952 2069 ...
colnames(crosstab3) <- c( "YEAR","DAY_OF_WEEK", "COUNT")
head(crosstab3)
## YEAR DAY_OF_WEEK COUNT
## 1 2012 Monday 1911
## 2 2013 Monday 1900
## 3 2014 Monday 1899
## 4 2015 Monday 1941
## 5 2016 Monday 1975
## 6 2017 Monday 914
Crashes_sum <-Crashes1 %>% group_by(LIGHT_CONDITION) %>% summarise(count = n())
Crashes_sum$Proportion <- Crashes_sum$count/nrow(Crashes1)
Crashes_sum$LIGHT_CONDITION <- Crashes_sum$LIGHT_CONDITION %>% factor(levels = Crashes_sum$LIGHT_CONDITION[order(-Crashes_sum$count)])
# Visualise Your Data
p4 <-ggplot(Crashes1, aes(x=REGION_NAME,fill=SEVERITY)) + geom_bar(aes(y=(..count..)/sum(..count..)))+coord_flip()+
labs(title="Metropolitan region have more crashes with injuries(Jan2012-Jul2017)",
y="Proportion of crashes",
x="Region names",fill="Severity")+scale_fill_brewer(palette = "Accent")
p5<-ggplot(crosstab, aes(MONTH,COUNT, group=YEAR, colour=YEAR)) +
geom_line()+geom_point() +
labs(x="Months", colour="Year",title="No of crashes in Melbourne(Jan 2012 to Jul 2017)") +
theme_classic()
p6<-ggplot(crosstab3, aes(DAY_OF_WEEK,COUNT, group=YEAR, colour=YEAR)) +
geom_line()+geom_point() +
labs(x="Day of Week", colour="YEAR",
title="Distribtion of crashes for day of week(Jan 2012 to Jul2017)")+theme_classic()
p7<-ggplot(Crashes_sum,aes(x = LIGHT_CONDITION, y = Proportion,fill=LIGHT_CONDITION))+ geom_bar(stat="identity")+coord_flip()+theme(legend.position ="NONE")+
labs(x="Light conditions",title="Distribtion of crashes as per light conditions during crash(Jan 2012 to Jul2017)",caption="Source: https://www.data.vic.gov.au/data/dataset/crashes-last-five-years")
gridExtra::grid.arrange(p5,p6,p4,p7,top="Analysis of crashes in Melbourne(Jan 2012- Jul 2017)")
The above grid has four plots starting from the first one which is a line graph describing the trend in crashes for each month through the years (Jan 2012 to July 2017) in Melbourne and we can see approximately all the months have similar trend but in 2017 upto July there seems to be a drop in the number of crashes compared to other years, probably the measures taken by victorian government for reducing the number of crashes is working. Second plot in grid shows distribution of crashes on days of week and we can see Thursday and Friday have maximum crashes. Third part of the graph depicts region wise distribution of crashes and the severity (serious injury, other injury and fatal crashes) of the crashes in each region. We can clearly see that the number of crashes is high in Metrploitian regions because the population is dense in the metropolitian regions. Compared to serious injuries other types of injuries are more whereas fatal accidents are relatively low in all the regions. The last part of the grid depicts a plot presenting light conditions during the crash, the plot shows maximum crashes occur during day light.