The dataset provided is US ACCIDENT INJURY that shows data regarding the accidents that takes place in US mine sites.
setwd("~/CDA")
library(ggplot2)
library(tidyr)
library(flextable)
library(timetk)
library(zoo)
library(dplyr)
library(gganimate)
library(plotly)
library(tidyverse)
library(reshape2)
library(gifski)
Loading the main dataset.
usdat<-read.csv("US Accident Data.csv")
Using str to analyze the data, we find:
str(usdat)
## 'data.frame': 2000 obs. of 57 variables:
## $ MINE_ID : int 100003 100003 100008 100011 100011 100011 100011 100011 100016 100021 ...
## $ CONTROLLER_ID : chr "41044" "41044" "M31753" "M11763" ...
## $ CONTROLLER_NAME : chr "Lhoist Group" "Lhoist Group" "Alan B Cheney" "Imerys S A" ...
## $ OPERATOR_ID : chr "L13586" "L13586" "L31753" "L17074" ...
## $ OPERATOR_NAME : chr "Lhoist North America " "Lhoist North America " "Cheney Lime & Cement Company" "Imerys Pigments LLC" ...
## $ CONTRACTOR_ID : chr "" "" "" "" ...
## $ DOCUMENT_NO : num 2.2e+11 2.2e+11 2.2e+11 2.2e+11 2.2e+11 ...
## $ SUBUNIT_CD : int 3 30 30 30 30 30 30 30 3 3 ...
## $ SUBUNIT : chr "STRIP, QUARY, OPEN PIT" "MILL OPERATION/PREPARATION PLANT" "MILL OPERATION/PREPARATION PLANT" "MILL OPERATION/PREPARATION PLANT" ...
## $ ACCIDENT_DT : chr "14/03/2012" "8/01/2007" "4/07/2009" "26/05/2000" ...
## $ CAL_YR : int 2012 2007 2009 2000 2005 2006 2008 2012 2000 2006 ...
## $ CAL_QTR : int 1 1 3 2 1 1 4 2 3 1 ...
## $ FISCAL_YR : int 2012 2007 2009 2000 2005 2006 2009 2012 2000 2006 ...
## $ FISCAL_QTR : int 2 2 4 3 2 2 1 3 4 2 ...
## $ ACCIDENT_TIME : int 945 1105 1000 1100 1430 1130 430 930 730 230 ...
## $ DEGREE_INJURY_CD : chr "5" "6" "3" "5" ...
## $ DEGREE_INJURY : chr "DAYS RESTRICTED ACTIVITY ONLY" "NO DYS AWY FRM WRK,NO RSTR ACT" "DAYS AWAY FROM WORK ONLY" "DAYS RESTRICTED ACTIVITY ONLY" ...
## $ FIPS_STATE_CD : int 1 1 1 1 1 1 1 1 1 1 ...
## $ UG_LOCATION_CD : chr "?" "?" "?" "?" ...
## $ UG_LOCATION : chr "NO VALUE FOUND" "NO VALUE FOUND" "NO VALUE FOUND" "NO VALUE FOUND" ...
## $ UG_MINING_METHOD_CD: chr "?" "?" "?" "?" ...
## $ UG_MINING_METHOD : chr "NO VALUE FOUND" "NO VALUE FOUND" "NO VALUE FOUND" "NO VALUE FOUND" ...
## $ MINING_EQUIP_CD : chr "24" "28" "?" "?" ...
## $ MINING_EQUIP : chr "Front-end loader, Tractor-shovel, Payloader, Highlift, Skip loader" "Hand tools (not powered)" "NO VALUE FOUND" "NO VALUE FOUND" ...
## $ EQUIP_MFR_CD : chr "119" "121" "?" "?" ...
## $ EQUIP_MFR_NAME : chr "Not on this list" "Not Reported" "NO VALUE FOUND" "NO VALUE FOUND" ...
## $ EQUIP_MODEL_NO : chr "22321" "" "" "?" ...
## $ SHIFT_BEGIN_TIME : int 600 700 600 700 700 700 2300 700 700 1800 ...
## $ CLASSIFICATION_CD : chr "12" "10" "18" "9" ...
## $ CLASSIFICATION : chr "POWERED HAULAGE" "HANDTOOLS (NONPOWERED)" "SLIP OR FALL OF PERSON" "HANDLING OF MATERIALS" ...
## $ ACCIDENT_TYPE_CD : chr "21" "8" "30" "27" ...
## $ ACCIDENT_TYPE : chr "CGHT I, U, B, MVNG & STTN OBJS" "STRUCK BY, NEC" "OVER-EXERTION, NEC" "OVER-EXERTION IN LIFTING OBJS" ...
## $ NO_INJURIES : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TOT_EXPER : num 4.35 0.02 10 NA 0.87 ...
## $ MINE_EXPER : num 4.35 0.02 2.15 0.23 0.87 ...
## $ JOB_EXPER : num 0.67 0.02 2.15 0.23 0.38 ...
## $ OCCUPATION_CD : chr "374" "374" "374" "374" ...
## $ OCCUPATION : chr "Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator" "Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator" "Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator" "Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator" ...
## $ ACTIVITY_CD : chr "28" "30" "13" "28" ...
## $ ACTIVITY : chr "HANDLING SUPPLIES/MATERIALS" "HAND TOOLS (NOT POWERED)" "CLIMB SCAFFOLDS/LADDERS/PLATFORMS" "HANDLING SUPPLIES/MATERIALS" ...
## $ INJURY_SOURCE_CD : chr "76" "46" "117" "4" ...
## $ INJURY_SOURCE : chr "SURFACE MINING MACHINES" "AXE,HAMMER,SLEDGE" "GROUND" "BAGS" ...
## $ NATURE_INJURY_CD : chr "160" "180" "330" "330" ...
## $ NATURE_INJURY : chr "CONTUSN,BRUISE,INTAC SKIN" "CUT,LACER,PUNCT-OPN WOUND" "SPRAIN,STRAIN RUPT DISC" "SPRAIN,STRAIN RUPT DISC" ...
## $ INJ_BODY_PART_CD : chr "700" "100" "520" "420" ...
## $ INJ_BODY_PART : chr "MULTIPLE PARTS (MORE THAN ONE MAJOR)" "HEAD,NEC" "ANKLE" "BACK (MUSCLES/SPINE/S-CORD/TAILBONE)" ...
## $ SCHEDULE_CHARGE : int 0 0 0 NA 0 0 0 0 NA 0 ...
## $ DAYS_RESTRICT : int 8 0 0 5 5 3 0 21 10 19 ...
## $ DAYS_LOST : int 0 0 9 NA 0 0 0 0 NA 13 ...
## $ TRANS_TERM : chr "N" "N" "N" "N" ...
## $ RETURN_TO_WORK_DT : chr "03/26/2012" "1/09/2007" "07/14/2009" "6/01/2000" ...
## $ IMMED_NOTIFY_CD : chr "? " "? " "? " "13" ...
## $ IMMED_NOTIFY : chr "NO VALUE FOUND" "NO VALUE FOUND" "NO VALUE FOUND" "NOT MARKED" ...
## $ INVEST_BEGIN_DT : chr "" "" "" "" ...
## $ NARRATIVE : chr "Employee was cleaning up at the Primary Crusher with the Dingo skid steer. The employee slipped and fell while "| __truncated__ "Handle of sledgehammer broke and head of hammer hit employee in the forehead." "EMPLOYEE WAS CLIMBING DOWN A LADDER AND WHEN HE STEPPED TO THE GROUND HE SLIPPED AND SPRAINED HIS LEFT ANKLE." "HE PULLED A BACK MUSCLE WHILE STACKING BAGS OF MATERIAL." ...
## $ CLOSED_DOC_NO : num NA NA 3.2e+11 3.2e+11 NA ...
## $ COAL_METAL_IND : chr "M" "M" "M" "M" ...
table(sapply(usdat, class))
##
## character integer numeric
## 39 13 5
Here, we can see all the variable types in this dataset. There are total 57 attributes in this dataset and 2000 observations of them,comprised by 39 character, 13 integer and 5 numeric variables. From the integer and numeric variables, MINE_ID, DOC_NO, CLOSED_DOCUMENT_NO, SUBUNIT_CD are actually ids so they could be treated as factors or ignored for now. And, here some the character variables should be transformed in factor variables.
Now, we can see the first 6 rows of this data set for better understanding.
head(usdat)
## MINE_ID CONTROLLER_ID CONTROLLER_NAME OPERATOR_ID
## 1 100003 41044 Lhoist Group L13586
## 2 100003 41044 Lhoist Group L13586
## 3 100008 M31753 Alan B Cheney L31753
## 4 100011 M11763 Imerys S A L17074
## 5 100011 M11763 Imerys S A L17074
## 6 100011 M11763 Imerys S A L17074
## OPERATOR_NAME CONTRACTOR_ID DOCUMENT_NO SUBUNIT_CD
## 1 Lhoist North America 2.20121e+11 3
## 2 Lhoist North America 2.20070e+11 30
## 3 Cheney Lime & Cement Company 2.20092e+11 30
## 4 Imerys Pigments LLC 2.20004e+11 30
## 5 Imerys Pigments LLC 2.20050e+11 30
## 6 Imerys Pigments LLC 2.20061e+11 30
## SUBUNIT ACCIDENT_DT CAL_YR CAL_QTR FISCAL_YR
## 1 STRIP, QUARY, OPEN PIT 14/03/2012 2012 1 2012
## 2 MILL OPERATION/PREPARATION PLANT 8/01/2007 2007 1 2007
## 3 MILL OPERATION/PREPARATION PLANT 4/07/2009 2009 3 2009
## 4 MILL OPERATION/PREPARATION PLANT 26/05/2000 2000 2 2000
## 5 MILL OPERATION/PREPARATION PLANT 22/01/2005 2005 1 2005
## 6 MILL OPERATION/PREPARATION PLANT 29/03/2006 2006 1 2006
## FISCAL_QTR ACCIDENT_TIME DEGREE_INJURY_CD DEGREE_INJURY
## 1 2 945 5 DAYS RESTRICTED ACTIVITY ONLY
## 2 2 1105 6 NO DYS AWY FRM WRK,NO RSTR ACT
## 3 4 1000 3 DAYS AWAY FROM WORK ONLY
## 4 3 1100 5 DAYS RESTRICTED ACTIVITY ONLY
## 5 2 1430 5 DAYS RESTRICTED ACTIVITY ONLY
## 6 2 1130 5 DAYS RESTRICTED ACTIVITY ONLY
## FIPS_STATE_CD UG_LOCATION_CD UG_LOCATION UG_MINING_METHOD_CD
## 1 1 ? NO VALUE FOUND ?
## 2 1 ? NO VALUE FOUND ?
## 3 1 ? NO VALUE FOUND ?
## 4 1 ? NO VALUE FOUND ?
## 5 1 ? NO VALUE FOUND ?
## 6 1 ? NO VALUE FOUND ?
## UG_MINING_METHOD MINING_EQUIP_CD
## 1 NO VALUE FOUND 24
## 2 NO VALUE FOUND 28
## 3 NO VALUE FOUND ?
## 4 NO VALUE FOUND ?
## 5 NO VALUE FOUND ?
## 6 NO VALUE FOUND ?
## MINING_EQUIP
## 1 Front-end loader, Tractor-shovel, Payloader, Highlift, Skip loader
## 2 Hand tools (not powered)
## 3 NO VALUE FOUND
## 4 NO VALUE FOUND
## 5 NO VALUE FOUND
## 6 NO VALUE FOUND
## EQUIP_MFR_CD EQUIP_MFR_NAME EQUIP_MODEL_NO SHIFT_BEGIN_TIME
## 1 119 Not on this list 22321 600
## 2 121 Not Reported 700
## 3 ? NO VALUE FOUND 600
## 4 ? NO VALUE FOUND ? 700
## 5 ? NO VALUE FOUND 700
## 6 ? NO VALUE FOUND 700
## CLASSIFICATION_CD CLASSIFICATION ACCIDENT_TYPE_CD
## 1 12 POWERED HAULAGE 21
## 2 10 HANDTOOLS (NONPOWERED) 8
## 3 18 SLIP OR FALL OF PERSON 30
## 4 9 HANDLING OF MATERIALS 27
## 5 9 HANDLING OF MATERIALS 38
## 6 21 OTHER 26
## ACCIDENT_TYPE NO_INJURIES TOT_EXPER MINE_EXPER JOB_EXPER
## 1 CGHT I, U, B, MVNG & STTN OBJS 1 4.35 4.35 0.67
## 2 STRUCK BY, NEC 1 0.02 0.02 0.02
## 3 OVER-EXERTION, NEC 1 10.00 2.15 2.15
## 4 OVER-EXERTION IN LIFTING OBJS 1 NA 0.23 0.23
## 5 ABSRTN RAD CAUST TXC & NOX SBS 1 0.87 0.87 0.38
## 6 BODILY REACTION, NEC 1 5.62 5.62 5.62
## OCCUPATION_CD
## 1 374
## 2 374
## 3 374
## 4 374
## 5 382
## 6 304
## OCCUPATION
## 1 Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator
## 2 Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator
## 3 Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator
## 4 Warehouseman, Bagger, Palletizer/Stacker, Store keeper, Packager, Fabricator, Cleaning plant operator
## 5 Front-end loader, Scraper-loader operator, Pan operator, Payloader, Scraper rig operator
## 6 Maintenance man, Mechanic, Repair/Serviceman, Boilermaker, Fueler, Tire tech, Field service tech
## ACTIVITY_CD ACTIVITY INJURY_SOURCE_CD
## 1 28 HANDLING SUPPLIES/MATERIALS 76
## 2 30 HAND TOOLS (NOT POWERED) 46
## 3 13 CLIMB SCAFFOLDS/LADDERS/PLATFORMS 117
## 4 28 HANDLING SUPPLIES/MATERIALS 4
## 5 96 WORKING WITH CHEMICALS 21
## 6 39 MACHINE MAINTENANCE/REPAIR 2
## INJURY_SOURCE NATURE_INJURY_CD NATURE_INJURY
## 1 SURFACE MINING MACHINES 160 CONTUSN,BRUISE,INTAC SKIN
## 2 AXE,HAMMER,SLEDGE 180 CUT,LACER,PUNCT-OPN WOUND
## 3 GROUND 330 SPRAIN,STRAIN RUPT DISC
## 4 BAGS 330 SPRAIN,STRAIN RUPT DISC
## 5 ACIDS,ALKALI,WET CEMENT 130 BURN,CHEMICL-FUME,COMPOUN
## 6 BODILY MOTION 330 SPRAIN,STRAIN RUPT DISC
## INJ_BODY_PART_CD INJ_BODY_PART SCHEDULE_CHARGE
## 1 700 MULTIPLE PARTS (MORE THAN ONE MAJOR) 0
## 2 100 HEAD,NEC 0
## 3 520 ANKLE 0
## 4 420 BACK (MUSCLES/SPINE/S-CORD/TAILBONE) NA
## 5 330 HAND (NOT WRIST OR FINGERS) 0
## 6 420 BACK (MUSCLES/SPINE/S-CORD/TAILBONE) 0
## DAYS_RESTRICT DAYS_LOST TRANS_TERM RETURN_TO_WORK_DT IMMED_NOTIFY_CD
## 1 8 0 N 03/26/2012 ?
## 2 0 0 N 1/09/2007 ?
## 3 0 9 N 07/14/2009 ?
## 4 5 NA N 6/01/2000 13
## 5 5 0 N 2/01/2005 ?
## 6 3 0 N 4/04/2006 ?
## IMMED_NOTIFY INVEST_BEGIN_DT
## 1 NO VALUE FOUND
## 2 NO VALUE FOUND
## 3 NO VALUE FOUND
## 4 NOT MARKED
## 5 NO VALUE FOUND 01/22/2005
## 6 NO VALUE FOUND
## NARRATIVE
## 1 Employee was cleaning up at the Primary Crusher with the Dingo skid steer. The employee slipped and fell while operating the skid steer and the machine pinned him against the cement retaining wall.
## 2 Handle of sledgehammer broke and head of hammer hit employee in the forehead.
## 3 EMPLOYEE WAS CLIMBING DOWN A LADDER AND WHEN HE STEPPED TO THE GROUND HE SLIPPED AND SPRAINED HIS LEFT ANKLE.
## 4 HE PULLED A BACK MUSCLE WHILE STACKING BAGS OF MATERIAL.
## 5 EE hands began to break out in a rash after he handled material coated with stearic acid. He was placed on restricted duty on 1/25/05 after first dr visit. EE did not notify management until second hand began to break out.
## 6 EE was reaching for air supply cutoff valve.
## CLOSED_DOC_NO COAL_METAL_IND
## 1 NA M
## 2 NA M
## 3 3.20092e+11 M
## 4 3.20004e+11 M
## 5 NA M
## 6 3.20061e+11 M
Here, first we have to do some type conversion. We did some numeric variables into character format because they are actually ids.
usdat$DOCUMENT_NO<- as.character(usdat$DOCUMENT_NO)
usdat$CLOSED_DOC_NO<- as.character(usdat$CLOSED_DOC_NO)
usdat$MINE_ID<-as.character(usdat$MINE_ID)
Now, we can find the summary measures for all the numerical columns.
numeric <- usdat %>%
select(TOT_EXPER, JOB_EXPER, MINE_EXPER) %>%
tidyr::gather(key = "Variable", value = "value") %>%
group_by(Variable) %>%
summarise(Frequency= n(),
Mean= round(mean(value, na.rm=T), 2),
Median = round(median(value,na.rm = T),2),
Sd = round(sd(value, na.rm = TRUE),2))
flextable(numeric)
Variable | Frequency | Mean | Median | Sd |
JOB_EXPER | 2,000 | 6.95 | 3.46 | 8.27 |
MINE_EXPER | 2,000 | 6.93 | 3.00 | 8.82 |
TOT_EXPER | 2,000 | 11.02 | 7.00 | 10.76 |
Above table shows that the summary statistics for the important numerical variables.
For summary measurwes of categorical variables we have to convert them in factors.
usdat <- usdat %>% mutate_if(is.character,as.factor)
table(sapply(usdat, class))
##
## factor integer numeric
## 42 12 3
Now. we can see that there are 42 facors and 12 integer and 3 numeric variables.
We can choose each factor variable and find its categories quantity and percentages values.
sbunit<-usdat%>%group_by(SUBUNIT) %>%
summarise(count = n()) %>%
mutate(p = round(count / sum(count, na.rm = TRUE), 2)*100)
flextable(sbunit)
SUBUNIT | count | p |
AUGER | 1 | 0 |
CULM BANK/REFUSE PILE | 1 | 0 |
DREDGE | 39 | 2 |
INDEPENDENT SHOPS OR YARDS | 5 | 0 |
MILL OPERATION/PREPARATION PLANT | 535 | 27 |
OFFICE WORKERS AT MINE SITE | 9 | 0 |
STRIP, QUARY, OPEN PIT | 619 | 31 |
SURFACE AT UNDERGROUND | 75 | 4 |
UNDERGROUND | 716 | 36 |
Here, we find the summaries for SUBUNIT, referring to the location within a mine where the accident/injury/illness occurred.
usdat%>% group_by(FIPS_STATE_CD)%>%filter(FIPS_STATE_CD != "NO VALUE FOUND")%>%summarise(count = n())%>%
arrange(count)%>%
ggplot(aes(reorder(FIPS_STATE_CD,count),count)) +geom_bar(stat = "identity", fill ="#3C565B")+coord_flip()+
ggtitle("Number of Total Accidents by State") +
geom_text(aes(label = count), vjust = 0.6, hjust = -.01)+
theme_bw() +
xlab("State Code")+
ylab("Total Number of Accidents")
From the chart, it can be seen that state code 21,17 and 18 have the highest number of injuries.
days<-(factor(weekdays(as.Date(usdat$ACCIDENT_DT))))
#Plot of the bar graph
plot(ggplot(usdat) +
geom_bar(aes(x=days), fill ="#151B54") +
theme(text = element_text(size= 10)) +
labs(title = "Accidents count based on days"))
The bar chart shows an equal distribution of accidents through out the week.
usdat%>% group_by(FISCAL_YR)%>%filter(FISCAL_YR != "NO VALUE FOUND")%>%summarise(count = n())%>%
arrange(count)%>%
ggplot(aes(reorder(FISCAL_YR,count),count)) +geom_bar(stat = "identity")+coord_flip()+
ggtitle("Number of Total Accidents by Fiscal Year") +
geom_text(aes(label = count), vjust = 0.6, hjust = -.01)+
theme_bw() +
xlab("Fiscal Year")+
ylab("Total Number of Accidents")
It can be seen from the bar chart that 2002 Fiscal Year had the highest number of injuries. It can also be seen that 2015 had the least which maybe because of introduction of new technologies and automation that helped reduced injuries.
cmi<-usdat%>%group_by(COAL_METAL_IND) %>%filter(COAL_METAL_IND != "NO VALUE FOUND")%>%
summarise(count = n()) %>%
mutate(p = round(count / sum(count, na.rm = TRUE), 2)*100)
flextable(cmi)
COAL_METAL_IND | count | p |
C | 781 | 39 |
M | 1,219 | 61 |
Here, we find the summaries for COAL_METAL_IND.Identifies if the accident occurred at a Coal or Metal/Non-Metal mine.It can be seen that around 1219 workers got injured in Metal/Non-Metal mine and around 781 got injured in Coal mine.
usdat%>%group_by(CLASSIFICATION)%>%
summarise(total_accident = n()) %>%
arrange(total_accident)%>% top_n(10)%>%
filter(CLASSIFICATION!="OTHER") %>%
mutate(name=factor(CLASSIFICATION, levels=CLASSIFICATION)) %>%
ggplot( aes(x=name, y=total_accident)) +
ggtitle("Top 10 Classifications by Total Accident")+
geom_segment( aes(xend=name, yend=0)) +
geom_point( size=4, color="orange") +
coord_flip() +
theme_bw() +
xlab("Classifications") +
ylab("Total Number of Accidents")
## Selecting by total_accident
Here I have summarized the total number of accidents grouping by classifications. There were total 28 classifications that identifies the circumstances which contributed most directly to the resulting accidents. Then I have cleaned the variable by filtering out the OTHER from classification. Then we plot the top 10 classifications based on count. From the chart it is evident that handling of materials has the highest number of accidents which is more than 500 when compared to stepping or kneeling on an object which is less than 50.
usdat%>%group_by(SUBUNIT)%>%
summarise(count = n()) %>%
arrange(count)%>% top_n(5) %>%
mutate(x = 3) %>%
ggplot(aes(x = 3, y = count, fill = SUBUNIT)) +
ggtitle("Number of Total Accidents by Top 5 SUB-UNIT")+
geom_col(color = "orange") +
geom_text(aes(label = count),
position = position_stack(vjust = 0.5)) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Spectral") +
xlim(c(0.2, 3 + 0.5)) +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank())
## Selecting by count
Here, I have visualised the data based on SUBUNIT,referring to the location within a mine where the accident/injury/illness occurred. Here are total 9 SUBUNIT from which I have taken top 5 based on number of count. Among those underground was the location where the maximum accident/injury/illness occurred. In contrast, Dredge has the least number of accidents.
usdat%>%group_by(INJ_BODY_PART)%>%
summarise(total_accident = n()) %>%
arrange(total_accident)%>% top_n(2)%>%
filter(INJ_BODY_PART!="OTHER") %>%
mutate(name=factor(INJ_BODY_PART, levels=INJ_BODY_PART)) %>%
ggplot( aes(x=name, y=total_accident)) +
ggtitle("Top 10 Classifications by Total Accident")+
geom_segment( aes(xend=name, yend=0)) +
geom_point( size=4, color="orange") +
coord_flip() +
theme_bw() +
xlab("Body Part") +
ylab("Total Number of Accidents")
## Selecting by total_accident
The plot shows which body part has suffered the most injuries. From the
chart, it can be seen that most of the injuries happened in finger/thumb
followed by back injuries.
usdat%>% group_by(CAL_YR)%>%summarise(count=n()) %>%
ggplot( aes(x=CAL_YR, y=count)) +
geom_line( color="grey") +
geom_point(shape=21, color="black", fill="#69b3a2", size=6) +
ggtitle("Number of Accidents by Time( Cal_Yr)")+theme_bw() +
xlab("Calender Years")+
ylab("Total Number of Accidents")
Here, we can check the yearly total number of accidents. To create this
chart the data is summarized for each years using number of counts. From
the chart, we can see that the number of accidents are showing
decreasing trend over the years.
usdat%>% group_by(UG_LOCATION)%>%filter(UG_LOCATION != "NO VALUE FOUND")%>%summarise(count = n())%>%
arrange(count)%>%top_n(5)%>%
ggplot(aes(reorder(UG_LOCATION,count),count)) +geom_bar(stat = "identity", color = "black", fill = "#4863A0")+coord_flip()+
ggtitle("Number of Total Accidents by top 5 Underground Location") +
geom_text(aes(label = count), vjust = 0.6, hjust = -.01)+
theme_bw() +
xlab("Underground Locations")+
ylab("Total Number of Accidents")
## Selecting by count
Here, we can see top 5 underground location where maximum accident
occurs. First, we filter out the “NO VALUE FOUND” category from
underground locations. At the underground location, FACE has the maximum
number of accidents with 207 counts.
usdat%>%group_by(MINING_EQUIP)%>%filter(MINING_EQUIP != "NO VALUE FOUND")%>%summarise(ct = n())%>%top_n(5)%>%
ggplot(aes(reorder(MINING_EQUIP,ct),ct)) +geom_bar(stat = "identity", fill = "#3CB371")+coord_flip()+
ggtitle("Number of Total Accidents by Top 5 Mining
Equipments") +
geom_text(aes(label = ct),hjust=1.2, colour="white", size=3.5)+theme_bw()+
xlab("Mining Equipment")+
ylab("Total Number of Accidents")
## Selecting by ct
Here, we can see the total number of accidents based on most used mining equipment. First, we filter out the “NO VALUE FOUND”. From the bar chart, it is evident that using mining equipment hand tools caused maximum number of accidents with the number being 235 which is very high when compared to others.
usdat%>%group_by(UG_MINING_METHOD)%>%filter(UG_MINING_METHOD != "NO VALUE FOUND")%>%summarise(ct = n())%>%top_n(5)%>%
ggplot(aes(reorder(UG_MINING_METHOD,ct),ct)) +geom_bar(stat = "identity",fill = "#E67451")+
ggtitle("Number of Total Accidents by Top 5 Mining Methods") +
geom_text(aes(label = ct),vjust=-0.21, size=3.5)+ theme_bw()+
xlab("Mining Methods")+
ylab("Total number of Accidents")
## Selecting by ct
Here, we can see that the total number of accidents based on most used mining methods.First, we filter out the “NO VALUE FOUND”.From the chart, it can be inferred that continuous mining has resulted in a very high number of accidents with the number being 434 which is six times higher than longwall which comes second.
usdat%>%group_by(EQUIP_MFR_NAME)%>%filter(EQUIP_MFR_NAME != "NO VALUE FOUND" & EQUIP_MFR_NAME !="Not Reported" & EQUIP_MFR_NAME !="Not listed" & EQUIP_MFR_NAME !="Not on this list")%>%summarise(ct = n())%>%top_n(5)%>%
ggplot(aes(reorder(EQUIP_MFR_NAME,ct),ct)) +geom_bar(stat = "identity", fill ="#4E5180")+
ggtitle("Number of Total Accidentss by Top 5 Mining Machine Manufacturer") +
geom_text(aes(label = ct),vjust=-0.21, size=3.5)+ theme_bw()+
xlab("Mining Machine Manufacturer Name")+
ylab("Total Number of Accidents")
## Selecting by ct
The chart lists the manufacturer name which cause the accidents most. First, we filter out the no value found, Not Reported, Not listed, and Not on this list. While using mining machine of manufacturer Caterpillar maximum number of 114 accidents occurred which is again significantly higher than other manufacturers.
usdat%>%group_by(ACTIVITY)%>%filter(ACTIVITY != "NO VALUE FOUND")%>%summarise(ct=n())%>%top_n(5)%>%
ggplot(aes(reorder(ACTIVITY,ct),ct)) +
ggtitle("Number of Total Accidents by Most Common 5 Mining Activity") +
geom_bar(stat = "identity", fill = "#FDEEF4")+theme_bw()+
geom_text(aes(label = ct),color ="black",position=position_stack(vjust=0.5))+coord_flip()+
xlab("Mining activity")+
ylab("Total number of accidents")
## Selecting by ct
Here are the most common specific activity the accident victim was performing at the time of the incident. First, we filter out the no value found. It can be seen that while handling supplies and materials maximum number of 313 accidents happened.
usdat%>%group_by(INJURY_SOURCE)%>%filter(INJURY_SOURCE != "NO VALUE FOUND")%>%summarise(ct=n())%>%top_n(5)%>% ggplot(aes(reorder(INJURY_SOURCE,ct),ct)) +geom_bar(stat = "identity", fill = "#151B54")+ coord_flip()+
ggtitle("Number of Total Accidents by Most Common 5 Sources of Injury") +
geom_text(aes(label = ct),color ="white",position=position_stack(vjust=0.5))+theme_bw()+
xlab("Injury Source")+
ylab("Total Number of Accidents")
## Selecting by ct
Here are the most common injury source that were reason for accident. First, we filter out the no value found. From the chart, it can be seen that the METAL, NEC is responsible for maximum number of injuries with 255.
usdat%>%group_by(NATURE_INJURY)%>%summarise(count=sum(NO_INJURIES))%>%arrange(desc(count))%>%top_n(5)%>%
ggplot(aes(reorder(NATURE_INJURY,count),count)) +geom_bar(stat = "identity", fill="#3EA055")+
ggtitle("Number of Total Injuries by Most Common Nature Injury") +
geom_text(aes(label = count),color ="white",position=position_stack(vjust=0.5))+ coord_flip()+ theme_bw()+
xlab("Nature Injury")+
ylab("Total number of Injuries")
## Selecting by count
The plot shows the total number of injuries by common nature of injury. From the chart, it can be seen that sprain, strain, ruptured disc are most common injuries.
usdat%>%group_by(ACTIVITY)%>%summarise(dayslost = sum(DAYS_LOST))%>%top_n(5)%>%
ggplot(aes(reorder(ACTIVITY,dayslost),dayslost)) +geom_bar(stat = "identity")+ coord_flip()+theme_bw()+
ggtitle("Number of Total Lost Days due to Injured by most Common Activity") +
geom_text(aes(label = dayslost),color ="white",position=position_stack(vjust=0.5))+
xlab("Activity")+
ylab("Total Days Lost due to Injury")
## Selecting by dayslost
Here, we can see that while a person got injured by Operating Bulldozer seems to have lost a total of 410 days due to injury.
usdat%>%group_by(ACCIDENT_TYPE)%>%
summarise(total_injuries = sum(NO_INJURIES))%>% top_n(5)%>%
ggplot(aes(reorder(ACCIDENT_TYPE,total_injuries),total_injuries)) +geom_bar(stat = "identity", fill="#29465B")+coord_flip()+theme_bw()+
ggtitle("Number of Total Injuries by Most Commmon 5 Accident Types")+
geom_text(aes(label = total_injuries),color ="white",position=position_stack(vjust=0.5))+
xlab("Accident Types")+
ylab("Total Number of Injuries")
## Selecting by total_injuries
Here, we can see the number of total injuries by top 5 accident types. The Struck by, NEC accident type is responsible for 264 injuries.
usdat%>%group_by(SUBUNIT,NATURE_INJURY)%>%summarise(ct=n(), .groups = 'drop')%>%
ggplot(aes(NATURE_INJURY,SUBUNIT, fill= ct)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 45, hjust=1))
Before creating this visualisation, a cross tabulation is calculated using those two categorical variables based on their counts. Next, the created hitmap displays the frequencies of the cross tabulation between two variables sub-unit and nature injury.
usdat%>%group_by(ACCIDENT_TYPE,NATURE_INJURY)%>%summarise(ct=n(), .groups = 'drop')%>%
ggplot(aes(ACCIDENT_TYPE, NATURE_INJURY, fill= ct)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 45, hjust=1))
Before creating this visualisation, a cross tabulation is calculated using those two categorical variables based on their counts. Next, the created hitmap displays the frequencies of the cross tabulation between two variables Accident type and nature injury.
usdat%>%group_by( DEGREE_INJURY, NATURE_INJURY)%>%summarise(ct=n(), .groups = 'drop')%>%
ggplot(aes( DEGREE_INJURY, NATURE_INJURY, fill= ct)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 45, hjust=1))
Before creating this visualisation, a cross tabulation is calculated using those two categorical variables based on their counts. Next, the created hitmap displays the frequencies of the cross tabulation between two variables Degree injury and nature injury.
usdat%>%group_by(SUBUNIT, DEGREE_INJURY)%>%summarise(ct=n(), .groups = 'drop')%>%
ggplot(aes(SUBUNIT, DEGREE_INJURY, fill= ct)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 45, hjust=1))
Before creating this visualisation, a cross tabulation is calculated using those two categorical variables based on their counts. Next, the created hitmap displays the frequencies of the cross tabulation between two variables Subunit and Degree injury.
usdat$cal_year <- as.Date(as.character(usdat$CAL_YR),format = "%Y")
d <- usdat %>%
group_by(cal_year) %>%
summarise(job_experience=sum(JOB_EXPER, na.rm = T),
total_experience=sum(TOT_EXPER, na.rm = T))
df <- melt(d, id.vars = "cal_year")
df <- transform(df, date=zoo::as.Date(cal_year,frac=0))
df <- as_tibble(df)
p <- df %>%
ggplot(aes(x=date,y=value, color=variable))+geom_line(size=2)+geom_point()+transition_reveal(date)+theme(legend.position = "none")
animate(p, renderer = gifski_renderer())
The multiple line chart describe that , the total changes of job experienced and total experienced people in every year from the starting. The graphs also visualize that the line chart is decreasing over a period of time.
c <- usdat%>%group_by(ACCIDENT_TYPE,cal_year)%>% slice(1:10)%>%
summarise(total_injuries = sum(NO_INJURIES))%>% slice(1:10)
c <- transform(c, date=zoo::as.Date(cal_year,frac=0))
d <- c%>%
ggplot(aes(x=cal_year, y=total_injuries, fill=ACCIDENT_TYPE,
text=ACCIDENT_TYPE))+ geom_area()+ transition_reveal(date)+theme(legend.position = "none")+labs(main="Yearly wise total injuries people Area Chart")
animate(d, renderer = gifski_renderer())
The animated area chart describe that , the yearly total injuries people in different calender year.The total injuries can be seen decreasing over the time with few fluctuations before 2010, and a rapid decrease after 2010.
usdat <- usdat %>%
mutate(EQUIP_MFR_NAME=recode(EQUIP_MFR_NAME,
"Joy Machinery Co. (Joy, Joy Manufacturing Co.)"="Joy Machinery"))
gdp_formatted <- usdat %>%
select(cal_year,EQUIP_MFR_NAME) %>%group_by(EQUIP_MFR_NAME, cal_year)%>%filter(EQUIP_MFR_NAME != "NO VALUE FOUND" & EQUIP_MFR_NAME !="Not Reported" & EQUIP_MFR_NAME !="Not listed" & EQUIP_MFR_NAME !="Not on this list")%>%summarise(ct = round(n(),2)) %>%
arrange(desc(ct))%>%head(30)
## `summarise()` has grouped output by 'EQUIP_MFR_NAME'. You can override using
## the `.groups` argument.
gggdp_formatted <- transform(gdp_formatted, date=zoo::as.Date(cal_year,frac=0))
gggdp_formatted <- gggdp_formatted%>%
ggplot(aes(reorder(EQUIP_MFR_NAME,ct),ct,fill=EQUIP_MFR_NAME)) +geom_bar(stat = "identity")+coord_flip()+
ggtitle("Total Accidents by Top Mining Machine Manufacturer in every year")+
xlab("Mining Machine Manufacturer Name")+
ylab("Total Number of Accidents")+transition_reveal(date)+theme(legend.position = "none")
animate(gggdp_formatted, renderer = gifski_renderer())
The animated column bar chart visualize that Total Accidents by Top Mining Machine Manufacturer in every year.