Road Accident Exploratory Data Analysis
data <- read.csv("India_accidents_datagov.csv")
summary(data)
## SL_NO STATE_UT_CITY LOCATION_TYPE PARENT_STATE
## Min. : 1 Length:89 Length:89 Length:89
## 1st Qu.:23 Class :character Class :character Class :character
## Median :45 Mode :character Mode :character Mode :character
## Mean :45
## 3rd Qu.:67
## Max. :89
## REGION DOMINANT_ROAD_TYPE PRIMARY_CAUSE_OF_ACCIDENT
## Length:89 Length:89 Length:89
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## ROAD_SURFACE_CONDITION PEAK_ACCIDENT_TIME MOST_INVOLVED_VEHICLE
## Length:89 Length:89 Length:89
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## ROAD_ACC_CASES ROAD_ACC_INJURED ROAD_ACC_DIED TOTAL_TRAFFIC_ACC_CASES
## Min. : 4 Min. : 3 Min. : 2 Min. : 4
## 1st Qu.: 451 1st Qu.: 333 1st Qu.: 150 1st Qu.: 473
## Median : 1181 Median : 987 Median : 272 Median : 1232
## Mean : 5766 Mean : 5397 Mean : 2102 Mean : 6075
## 3rd Qu.: 3452 3rd Qu.: 3372 3rd Qu.: 838 3rd Qu.: 3452
## Max. :64105 Max. :67703 Max. :24109 Max. :66117
## TOTAL_TRAFFIC_INJURED TOTAL_TRAFFIC_DIED CASE_FATALITY_RATE_PCT
## Min. : 3 Min. : 2 Min. : 6.79
## 1st Qu.: 333 1st Qu.: 165 1st Qu.: 21.37
## Median : 987 Median : 281 Median : 38.51
## Mean : 5427 Mean : 2382 Mean : 41.88
## 3rd Qu.: 3446 3rd Qu.: 918 3rd Qu.: 51.43
## Max. :67892 Max. :28615 Max. :116.90
## INJURY_RATE_PCT FATALITY_RATIO_PCT
## Min. : 0.74 Min. : 6.41
## 1st Qu.: 68.55 1st Qu.:19.55
## Median : 82.33 Median :29.81
## Mean : 83.69 Mean :33.24
## 3rd Qu.: 96.57 3rd Qu.:40.92
## Max. :186.43 Max. :99.26
str(data)
## 'data.frame': 89 obs. of 19 variables:
## $ SL_NO : int 1 2 3 4 5 6 7 8 9 10 ...
## $ STATE_UT_CITY : chr "Andhra Pradesh" "Arunachal Pradesh" "Assam" "Bihar" ...
## $ LOCATION_TYPE : chr "State" "State" "State" "State" ...
## $ PARENT_STATE : chr "Andhra Pradesh" "Arunachal Pradesh" "Assam" "Bihar" ...
## $ REGION : chr "South" "Northeast" "Northeast" "East" ...
## $ DOMINANT_ROAD_TYPE : chr "Other Roads" "Other Roads" "National Highway" "State Highway" ...
## $ PRIMARY_CAUSE_OF_ACCIDENT: chr "Over-Speeding" "Bad Weather" "Use of Mobile Phone" "Over-Speeding" ...
## $ ROAD_SURFACE_CONDITION : chr "Dry" "Dry" "Dry" "Wet" ...
## $ PEAK_ACCIDENT_TIME : chr "Afternoon (12PM-6PM)" "Afternoon (12PM-6PM)" "Morning (6AM-12PM)" "Night (10PM-6AM)" ...
## $ MOST_INVOLVED_VEHICLE : chr "Truck/Lorry" "Two-Wheeler" "Bus" "Two-Wheeler" ...
## $ ROAD_ACC_CASES : int 21070 215 7028 10801 13091 3012 15777 10654 2484 5175 ...
## $ ROAD_ACC_INJURED : int 21340 177 5679 7068 11459 1071 15139 8353 3891 3747 ...
## $ ROAD_ACC_DIED : int 8293 152 3060 8898 5890 274 7634 5228 979 3898 ...
## $ TOTAL_TRAFFIC_ACC_CASES : int 22099 215 7739 12297 13524 3066 16549 11876 2484 5544 ...
## $ TOTAL_TRAFFIC_INJURED : int 21340 177 5679 7074 11480 1071 15139 8379 3891 3766 ...
## $ TOTAL_TRAFFIC_DIED : int 9330 152 3775 10397 6323 328 8417 6424 979 4251 ...
## $ CASE_FATALITY_RATE_PCT : num 42.2 70.7 48.8 84.5 46.8 ...
## $ INJURY_RATE_PCT : num 96.6 82.3 73.4 57.5 84.9 ...
## $ FATALITY_RATIO_PCT : num 30.4 46.2 39.9 59.5 35.5 ...
south_data <- subset(data, REGION == "South")
hist(south_data$INJURY_RATE_PCT,
main="Injury Rate in South Region",
xlab="Injury Rate %",
col="lightblue")

north_data <- subset(data, REGION == "North")
hist(north_data$INJURY_RATE_PCT,
main="Injury Rate in North Region",
xlab="Injury Rate %",
col="lightgreen")

nh_data <- subset(data, DOMINANT_ROAD_TYPE == "National Highway")
hist(nh_data$ROAD_ACC_DIED,
main="Deaths on National Highways",
xlab="Number of Deaths",
col="red")

bike_data <- subset(data, MOST_INVOLVED_VEHICLE == "Two-Wheeler")
hist(bike_data$ROAD_ACC_CASES,
main="Two-Wheeler Accident Cases",
xlab="Accident Cases",
col="cyan")

barplot(table(data$LOCATION_TYPE),
main="Accidents by Location Type",
xlab="Location Type",
ylab="Count",
col="lightgreen")

barplot(table(data$DOMINANT_ROAD_TYPE),
main="Accidents by Road Type",
ylab="Count",
col="orange",
las=2)

barplot(table(data$MOST_INVOLVED_VEHICLE),
main="Most Involved Vehicle in Accidents",
xlab="Vehicle Type",
ylab="Count",
col="purple",
las=2)

barplot(table(data$PEAK_ACCIDENT_TIME),
main="Accidents by Time of Day",
xlab="Time",
ylab="Count",
col="yellow",
las=2)

plot(data$ROAD_ACC_CASES, data$ROAD_ACC_DIED,
main="Accident Cases vs Deaths",
xlab="Number of Accident Cases",
ylab="Number of Deaths",
col="blue",
pch=19)

plot(data$ROAD_ACC_INJURED, data$ROAD_ACC_DIED,
main="Injuries vs Deaths",
xlab="Number of Injured",
ylab="Number of Deaths",
col="red",
pch=19)

plot(data$INJURY_RATE_PCT, data$FATALITY_RATIO_PCT,
main="Injury Rate vs Fatality Ratio",
xlab="Injury Rate %",
ylab="Fatality Ratio %",
col="green",
pch=19)

plot(data$ROAD_ACC_CASES, data$ROAD_ACC_DIED,
main="Accident Cases vs Deaths",
xlab="Accident Cases",
ylab="Deaths",
col="blue",
pch=19)
abline(lm(ROAD_ACC_DIED ~ ROAD_ACC_CASES, data=data),
col="red",
lwd=2)

boxplot(ROAD_ACC_DIED ~ REGION,
data=data,
main="Deaths by Region",
xlab="Region",
ylab="Number of Deaths",
col="orange")

mosaicplot(table(data$DOMINANT_ROAD_TYPE, data$REGION),
main="Road Type vs Region",
xlab="Road Type",
ylab="Region",
col="orange")

mosaicplot(table(data$MOST_INVOLVED_VEHICLE, data$PEAK_ACCIDENT_TIME),
main="Vehicle Type vs Accident Time",
xlab="Vehicle Type",
ylab="Accident Time",
col="green",
las=2)
