library(NLP)
library(topicmodels)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(rJava)
library(coreNLP)
library(wordnet)
setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Text Analytic/WK 1/mini project")
textdata <- read.delim("osha.txt", header=FALSE, sep="\t", quote = "", stringsAsFactors = FALSE)
textdata[1,]
## V1 V2
## 1 201079928 Employee Is Burned By Forklift Radiator Fluid
## V3
## 1 At approximately 11:30 a.m. on November 13 2013 Employee #1 with Edco Waste & Recycling Services was operating a forklift (Linde Lift Truck; Serial Number: H2X393S04578; identified by the employer as FL-3) from approximately 4:00 a.m. moving bales of recyclable paper products from a collection area in the yard into trucks. Then Employee #1 cleaned and was replacing an air filter on the forklift FL-3. To clean out the air filter Employee #1 parked FL-3 in the doorway of the maintenance building. The air filter was located on the rear of the forklift behind the cab frame on the driver's side. Employee #1 removed the air filter and cleaned it out and then he climbed up onto the back of the forklift to replace it. While up on the back of the forklift Employee #1's foot dislodged the cooling system radiator cap. The fluid in the lift truck's cooling system was hot and under pressure from being operated all morning. The hot fluid sprayed up and out of the reservoir. Employee #1 was burned on the upper legs and the groin area. Employee #1 jumped off of the back of the forklift onto the ground. Coworkers came to his assistance and called emergency services. Employee #1 was hospitalized at a burn center for over 24 hours for treatment of second degree burns to the upper legs and groin area.
comment <- textdata$V2
comment1 <- textdata$V3
head(comment)
## [1] " Employee Is Burned By Forklift Radiator Fluid "
## [2] " Employee Falls From Flatbed Trailer And Later Dies "
## [3] " Two Workers Are Struck By Motor Vehicle And One Is Killed "
## [4] " Employee Is Struck By Bales Of Wire And Killed "
## [5] " Employee Is Splashed With Hot Water And Is Burned "
## [6] " Employee Suffers Burns While Moving Soup "
head(comment1)
## [1] " At approximately 11:30 a.m. on November 13 2013 Employee #1 with Edco Waste & Recycling Services was operating a forklift (Linde Lift Truck; Serial Number: H2X393S04578; identified by the employer as FL-3) from approximately 4:00 a.m. moving bales of recyclable paper products from a collection area in the yard into trucks. Then Employee #1 cleaned and was replacing an air filter on the forklift FL-3. To clean out the air filter Employee #1 parked FL-3 in the doorway of the maintenance building. The air filter was located on the rear of the forklift behind the cab frame on the driver's side. Employee #1 removed the air filter and cleaned it out and then he climbed up onto the back of the forklift to replace it. While up on the back of the forklift Employee #1's foot dislodged the cooling system radiator cap. The fluid in the lift truck's cooling system was hot and under pressure from being operated all morning. The hot fluid sprayed up and out of the reservoir. Employee #1 was burned on the upper legs and the groin area. Employee #1 jumped off of the back of the forklift onto the ground. Coworkers came to his assistance and called emergency services. Employee #1 was hospitalized at a burn center for over 24 hours for treatment of second degree burns to the upper legs and groin area. "
## [2] " On August 30 2013 Employee #1 was working from a flatbed trailer. As he worked he fell from the flatbed trailer onto the ground striking his abdomen. The fall height of the flatbed trailer was 57 inches. Employee #1 sustained unspecified injuries in the fall that later on caused his death. "
## [3] " On August 27 2013 Employees #1 and #2 of Templar Inc. a construction company specializing in fiber optic installation and services were working along a highway. The highway speed limit was posted at 55 miles per hour. Employee #1 was marking the location of an underground line that ran below the turn lane. Employee #2 was next to Employee #1 and performing the duties of a flagger. A privately owned vehicle was travelling in the travel/through lane. The vehicle veered to the right entered the turn lane and struck both workers. Emergency medical services were called. Employee #1 was declared dead at the scene. Employee #2 refused emergency medical treatment for the bruises he received when struck. "
## [4] " On August 26 2013 Employee #1 with Lee Iron and Metal Company Inc. was using a forklift (Hyster 50 sit down rider) to move and stack bundled cubes of recycled insulated co-axial (coax) cable and insulated copper wire. The cubes weighed anywhere from 1 000 to 2 000 lbs. Employee #1 had completed stacking one column consisting of five cubes in height along the northeast wall of the recycling warehouse. Employee #1 was beginning a new second column directly in front of the existing first column. Employee #1 placed the first cube of the second column tight against the bottom cube of the existing first column. Employee #1 then backed the forklift up approximately 5 ft and exited the forklift. Employee #1 then stepped in between the forklift and the cubes to either clean off the top from possible loose wires that created a hump or to place 4 ft by 4 ft dunnage so the next cube could be stacked. The five tier stack of cubes began to overturn. The first one went over Employee #1's head and settled on the forks. The second cube (weighing 1 948 lbs) struck Employee #1 on the left side of his face and upper torso and forced him back and down. Employee #1 was pinned between the first cube that fell and second cube. A third cube also fell which struck Employee #1's legs. Employee #1 received a puncture wound to the right abdomen area which caused increased blood loss. According to the medical examiner's report Employee #1 died at the scene from blunt force trauma to the head. "
## [5] " On July 14 2013 Employee #1 vacuum pump truck driver and operator was offloading hot brine water at a geothermal power plant. He was assigned to transfer loads of the brine between power plants and had already made several trips between facilities. When he arrived at the plant Employee #1 connected the hose to the vehicle's tank outlet valve and proceeded to empty the tanks contents by gravity. While the tank was left emptying he went inside the plant's control center briefly to cool off and to get a drink of water. Employee #1 noticed that the flow of brine had stopped due to a clog in the hose when he returned. He tried to clear the hose by switching the truck to vacuum. As he did this he noticed that the flow was still impeded. Employee #1 loosened the hose coupler on the truck's valve which caused hot pressurized brine to flow out. He attempted to hold the hose but eventually let go causing the hot brine water to splash on his left abdomen right leg left leg and left shoulder. Employee #1 rushed into the control room and removed his hot brine soaked clothing. A coworker observed the pump truck with the brine pouring out and Employee #1 running toward the control room. This coworker shut off the valve on the pump truck and went to check on Employee #1. Employee #1 was taken to a hospital and was then transferred to the burn unit of a medical center. He was admitted to the medical center where he was treated for second and third-degree burns and then hospitalized. "
## [6] " On June 30 2013 Employee #1 was working in a food Taqueria for a supermarket. The employee was in the kitchen area transferring soup from the kitchen area to a refrigerator. While in the refrigerator the employee lifted the pail while hot and the pail was soft due to the heat it folded causing the liquid to spill on her the employee's arm chest and abdomen area. Employee #1 was transported to an area hospital where she was treated for second and third degree burns and remains hospitalized. "
my_stopwords <- c(stopwords("english"), "will", "also", "etc", "else", "can", "even", "within", "without",
"well", "say", "year", "must", "need", "never", "now", "want", "still",
"time", "therefore", "send", "today", "may", "many", "make", "whose",
"however", "get", "have", "just", "him","will", "worker", "employe", "one", "two",
"employ", "work", "cowork", "approxim", "use", "day", "employee")
vector <- VectorSource(comment)
vector1 <- VectorSource(comment1)
corpus <- VCorpus(vector)
corpus1 <- VCorpus(vector1)
for(i in 1:6) {
print(corpus[[i]][1])
}
## $content
## [1] " Employee Is Burned By Forklift Radiator Fluid "
##
## $content
## [1] " Employee Falls From Flatbed Trailer And Later Dies "
##
## $content
## [1] " Two Workers Are Struck By Motor Vehicle And One Is Killed "
##
## $content
## [1] " Employee Is Struck By Bales Of Wire And Killed "
##
## $content
## [1] " Employee Is Splashed With Hot Water And Is Burned "
##
## $content
## [1] " Employee Suffers Burns While Moving Soup "
for(i in 1:6) {
print(corpus1[[i]][1])
}
## $content
## [1] " At approximately 11:30 a.m. on November 13 2013 Employee #1 with Edco Waste & Recycling Services was operating a forklift (Linde Lift Truck; Serial Number: H2X393S04578; identified by the employer as FL-3) from approximately 4:00 a.m. moving bales of recyclable paper products from a collection area in the yard into trucks. Then Employee #1 cleaned and was replacing an air filter on the forklift FL-3. To clean out the air filter Employee #1 parked FL-3 in the doorway of the maintenance building. The air filter was located on the rear of the forklift behind the cab frame on the driver's side. Employee #1 removed the air filter and cleaned it out and then he climbed up onto the back of the forklift to replace it. While up on the back of the forklift Employee #1's foot dislodged the cooling system radiator cap. The fluid in the lift truck's cooling system was hot and under pressure from being operated all morning. The hot fluid sprayed up and out of the reservoir. Employee #1 was burned on the upper legs and the groin area. Employee #1 jumped off of the back of the forklift onto the ground. Coworkers came to his assistance and called emergency services. Employee #1 was hospitalized at a burn center for over 24 hours for treatment of second degree burns to the upper legs and groin area. "
##
## $content
## [1] " On August 30 2013 Employee #1 was working from a flatbed trailer. As he worked he fell from the flatbed trailer onto the ground striking his abdomen. The fall height of the flatbed trailer was 57 inches. Employee #1 sustained unspecified injuries in the fall that later on caused his death. "
##
## $content
## [1] " On August 27 2013 Employees #1 and #2 of Templar Inc. a construction company specializing in fiber optic installation and services were working along a highway. The highway speed limit was posted at 55 miles per hour. Employee #1 was marking the location of an underground line that ran below the turn lane. Employee #2 was next to Employee #1 and performing the duties of a flagger. A privately owned vehicle was travelling in the travel/through lane. The vehicle veered to the right entered the turn lane and struck both workers. Emergency medical services were called. Employee #1 was declared dead at the scene. Employee #2 refused emergency medical treatment for the bruises he received when struck. "
##
## $content
## [1] " On August 26 2013 Employee #1 with Lee Iron and Metal Company Inc. was using a forklift (Hyster 50 sit down rider) to move and stack bundled cubes of recycled insulated co-axial (coax) cable and insulated copper wire. The cubes weighed anywhere from 1 000 to 2 000 lbs. Employee #1 had completed stacking one column consisting of five cubes in height along the northeast wall of the recycling warehouse. Employee #1 was beginning a new second column directly in front of the existing first column. Employee #1 placed the first cube of the second column tight against the bottom cube of the existing first column. Employee #1 then backed the forklift up approximately 5 ft and exited the forklift. Employee #1 then stepped in between the forklift and the cubes to either clean off the top from possible loose wires that created a hump or to place 4 ft by 4 ft dunnage so the next cube could be stacked. The five tier stack of cubes began to overturn. The first one went over Employee #1's head and settled on the forks. The second cube (weighing 1 948 lbs) struck Employee #1 on the left side of his face and upper torso and forced him back and down. Employee #1 was pinned between the first cube that fell and second cube. A third cube also fell which struck Employee #1's legs. Employee #1 received a puncture wound to the right abdomen area which caused increased blood loss. According to the medical examiner's report Employee #1 died at the scene from blunt force trauma to the head. "
##
## $content
## [1] " On July 14 2013 Employee #1 vacuum pump truck driver and operator was offloading hot brine water at a geothermal power plant. He was assigned to transfer loads of the brine between power plants and had already made several trips between facilities. When he arrived at the plant Employee #1 connected the hose to the vehicle's tank outlet valve and proceeded to empty the tanks contents by gravity. While the tank was left emptying he went inside the plant's control center briefly to cool off and to get a drink of water. Employee #1 noticed that the flow of brine had stopped due to a clog in the hose when he returned. He tried to clear the hose by switching the truck to vacuum. As he did this he noticed that the flow was still impeded. Employee #1 loosened the hose coupler on the truck's valve which caused hot pressurized brine to flow out. He attempted to hold the hose but eventually let go causing the hot brine water to splash on his left abdomen right leg left leg and left shoulder. Employee #1 rushed into the control room and removed his hot brine soaked clothing. A coworker observed the pump truck with the brine pouring out and Employee #1 running toward the control room. This coworker shut off the valve on the pump truck and went to check on Employee #1. Employee #1 was taken to a hospital and was then transferred to the burn unit of a medical center. He was admitted to the medical center where he was treated for second and third-degree burns and then hospitalized. "
##
## $content
## [1] " On June 30 2013 Employee #1 was working in a food Taqueria for a supermarket. The employee was in the kitchen area transferring soup from the kitchen area to a refrigerator. While in the refrigerator the employee lifted the pail while hot and the pail was soft due to the heat it folded causing the liquid to spill on her the employee's arm chest and abdomen area. Employee #1 was transported to an area hospital where she was treated for second and third degree burns and remains hospitalized. "
corpus <- tm_map(corpus, content_transformer(tolower)) #covernt to lower cases
corpus <- tm_map(corpus, removeNumbers) #remove digits
corpus <- tm_map(corpus, removeWords, my_stopwords)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stemDocument) #word stemming
corpus <- tm_map(corpus, removeWords, my_stopwords) #stopwords removal
corpus <- tm_map(corpus, stripWhitespace) #delete redundent whitespace "a b"-> "a b"
for(i in 1:6) {
print(corpus[[i]][1])
}
## $content
## [1] "burn forklift radiat fluid"
##
## $content
## [1] "fall flatb trailer later die"
##
## $content
## [1] " struck motor vehicl kill"
##
## $content
## [1] "struck bale wire kill"
##
## $content
## [1] "splash hot water burn"
##
## $content
## [1] "suffer burn move soup"
dtm <- DocumentTermMatrix(corpus)
tf <-sort(colSums(as.matrix(dtm)), decreasing=TRUE)
dark2 <- brewer.pal(6, "Dark2")
wordcloud(names(tf), tf, max.words=50, scale=c(3, 0.8), colors=dark2)
corpus1 <- tm_map(corpus1, content_transformer(tolower)) #covernt to lower cases
corpus1 <- tm_map(corpus1, removeNumbers) #remove digits
corpus1 <- tm_map(corpus1, removeWords, my_stopwords)
corpus1 <- tm_map(corpus1, removePunctuation)
corpus1 <- tm_map(corpus1, stemDocument) #word stemming
corpus1 <- tm_map(corpus1, removeWords, my_stopwords) #stopwords removal
corpus1 <- tm_map(corpus1, stripWhitespace) #delete redundent whitespace "a b"-> "a b"
for(i in 1:6) {
print(corpus1[[i]][1])
}
## $content
## [1] " m novemb edco wast recycl servic oper forklift lind lift truck serial number hxs identifi fl m move bale recycl paper product collect area yard truck clean replac air filter forklift fl clean air filter park fl doorway mainten build air filter locat rear forklift behind cab frame driver side remov air filter clean climb onto back forklift replac back forklift s foot dislodg cool system radiat cap fluid lift truck cool system hot pressur oper morn hot fluid spray reservoir burn upper leg groin area jump back forklift onto ground came assist call emerg servic hospit burn center hour treatment second degre burn upper leg groin area"
##
## $content
## [1] "august flatb trailer fell flatb trailer onto ground strike abdomen fall height flatb trailer inch sustain unspecifi injuri fall later caus death"
##
## $content
## [1] "august templar inc construct compani special fiber optic instal servic along highway highway speed limit post mile per hour mark locat underground line ran turn lane next perform duti flagger privat vehicl travel travel lane vehicl veer right enter turn lane struck emerg medic servic call declar dead scene refus emerg medic treatment bruis receiv struck"
##
## $content
## [1] "august lee iron metal compani inc forklift hyster sit rider move stack bundl cube recycl insul coaxial coax cabl insul copper wire cube weigh anywher lbs complet stack column consist five cube height along northeast wall recycl warehous begin new second column direct front exist first column place first cube second column tight bottom cube exist first column back forklift ft exit forklift step forklift cube either clean top possibl loos wire creat hump place ft ft dunnag next cube stack five tier stack cube began overturn first went s head settl fork second cube weigh lbs struck left side face upper torso forc back pin first cube fell second cube third cube fell struck s leg receiv punctur wound right abdomen area caus increas blood loss accord medic examin report die scene blunt forc trauma head"
##
## $content
## [1] "juli vacuum pump truck driver oper offload hot brine water geotherm power plant assign transfer load brine power plant alreadi made sever trip facil arriv plant connect hose vehicl tank outlet valv proceed empti tank content graviti tank left empti went insid plant control center briefli cool drink water notic flow brine stop due clog hose return tri clear hose switch truck vacuum notic flow imped loosen hose coupler truck valv caus hot pressur brine flow attempt hold hose eventu let go caus hot brine water splash left abdomen right leg left leg left shoulder rush control room remov hot brine soak cloth observ pump truck brine pour run toward control room shut valv pump truck went check taken hospit transfer burn unit medic center admit medic center treat second thirddegre burn hospit"
##
## $content
## [1] "june food taqueria supermarket kitchen area transfer soup kitchen area refriger refriger lift pail hot pail soft due heat fold caus liquid spill s arm chest abdomen area transport area hospit treat second third degre burn remain hospit"
dtm1 <- DocumentTermMatrix(corpus1)
tf1 <-sort(colSums(as.matrix(dtm1)), decreasing=TRUE)
dark2 <- brewer.pal(6, "Dark2")
wordcloud(names(tf1), tf1, max.words=50, scale=c(2.3, 0.8), colors=dark2)
#====topic modeling on V2========= #- Find the total count of words in each Document
rowTotals <- apply(dtm, 1, sum)
dtm <- dtm[rowTotals> 0, ]
lda_5_g <- LDA(dtm, 5, method="Gibbs")
terms(lda_5_g, 10)
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
## [1,] "fall" "amput" "kill" "injur" "burn"
## [2,] "fractur" "finger" "struck" "die" "suffer"
## [3,] "electr" "caught" "crush" "fall" "explos"
## [4,] "shock" "machin" "truck" "later" "sustain"
## [5,] "leg" "injur" "crane" "collaps" "fire"
## [6,] "ladder" "hand" "pin" "head" "injur"
## [7,] "electrocut" "arm" "run" "elev" "chemic"
## [8,] "roof" "press" "vehicl" "drown" "expos"
## [9,] "line" "oper" "roll" "exposur" "injuri"
## [10,] "forklift" "lacer" "lift" "attack" "hot"
logLik(lda_5_g)
## 'log Lik.' -201360.9 (df=13615)
lda_5_g@terms[1:10]
## [1] "abdomen" "abdomin" "abrad" "abras" "access" "accid"
## [7] "accident" "accord" "accumul" "acet"
lda_5_g@beta[3, 1:10]
## [1] -7.271407 -11.382281 -11.382281 -11.382281 -8.984386 -11.382281
## [7] -11.382281 -11.382281 -11.382281 -11.382281
showcloud = function (m, i) {
tt <- m@beta
colnames(tt) <- m@terms
top <- sort(tt[i, ], decreasing = TRUE)
wordcloud(names(top[1:20]), 2^top[1:20],scale=c(2.3, .8),rot.per=0.3, colors=dark2)
}
showcloud(lda_5_g, 5) #show cloud for the selected topic
t(topics(lda_5_g, 5))[1:10,]
## [,1] [,2] [,3] [,4] [,5]
## 1 1 2 4 5 3
## 2 3 4 1 2 5
## 3 3 1 2 4 5
## 4 1 3 2 4 5
## 5 5 1 2 3 4
## 6 5 1 2 3 4
## 7 1 3 4 2 5
## 8 3 1 2 4 5
## 9 1 3 2 4 5
## 10 1 2 5 3 4
which.max(tabulate(topics(lda_5_g)))
## [1] 1
tabulate(topics(lda_5_g))
## [1] 2726 2136 1911 1557 1663
table(topics(lda_5_g))
##
## 1 2 3 4 5
## 2726 2136 1911 1557 1663
lda_5_g@gamma[1,]
## [1] 0.2037037 0.2037037 0.1851852 0.2037037 0.2037037
barplot(lda_5_g@gamma[1,], names.arg=1:5, main="Topic distribution of Story 1")
lda_5_g1 <- LDA(dtm1, 5, method="Gibbs")
terms(lda_5_g1, 10)
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
## [1,] "hand" "hospit" "truck" "fell" "burn"
## [2,] "left" "medic" "back" "ground" "electr"
## [3,] "machin" "transport" "side" "feet" "power"
## [4,] "right" "servic" "oper" "lift" "tank"
## [5,] "oper" "emerg" "load" "fractur" "water"
## [6,] "hospit" "center" "struck" "floor" "line"
## [7,] "finger" "treat" "move" "fall" "fire"
## [8,] "cut" "call" "forklift" "top" "air"
## [9,] "number" "accid" "kill" "ladder" "system"
## [10,] "amput" "report" "trailer" "concret" "hospit"
logLik(lda_5_g1)
## 'log Lik.' -3308266 (df=74835)
lda_5_g1@terms[1:10]
## [1] "aaa" "aachen" "aal" "aard" "aaron" "abaco" "abacus"
## [8] "abandon" "abat" "abc"
lda_5_g1@beta[3, 1:10]
## [1] -13.83675 -13.83675 -13.83675 -13.83675 -11.43886 -13.83675 -11.43886
## [8] -11.43886 -13.83675 -13.83675
showcloud = function (m, i) {
tt <- m@beta
colnames(tt) <- m@terms
top <- sort(tt[i, ], decreasing = TRUE)
wordcloud(names(top[1:20]), 2^top[1:20],scale=c(2.3, .8),rot.per=0.3, colors=dark2)
}
showcloud(lda_5_g1, 5) #show cloud for the selected topic
####Now how do we know which document belongs to which topic? ####Let us get the 3 most likely topics for the first ten documents.
t(topics(lda_5_g1, 3))[1:10,]
## [,1] [,2] [,3]
## 1 5 3 2
## 2 4 3 2
## 3 3 2 5
## 4 4 2 3
## 5 5 3 2
## 6 5 1 3
## 7 4 1 3
## 8 3 4 2
## 9 3 2 1
## 10 4 2 1
which.max(tabulate(topics(lda_5_g1)))
## [1] 4
tabulate(topics(lda_5_g1))
## [1] 1908 1858 1874 2226 2134
table(topics(lda_5_g1))
##
## 1 2 3 4 5
## 1908 1858 1874 2226 2134
lda_5_g1@gamma[1,]
## [1] 0.1381579 0.1447368 0.2894737 0.1250000 0.3026316
barplot(lda_5_g1@gamma[1,], names.arg=1:5, main="Topic distribution of Story 1")
hyponyms <- function(x){
filter <- getTermFilter("ExactMatchFilter", x, TRUE)
terms <- getIndexTerms("NOUN", 1, filter)
synsets <- getSynsets(terms[[1]])
related <- tryCatch(
getRelatedSynsets(synsets[[1]], "~"),
error = function(condition) {
if (condition$message == "RcallMethod: invalid object parameter")
message("No direct hyponyms found")
else
stop(condition)
return(NULL)
}
)
if (is.null(related))
return(NULL)
return(unlist(sapply(related, getWord)))
}
setDict("C:/Program Files (x86)/WordNet/2.1/dict")
hyponyms("employee")
## [1] "bartender" "barman"
## [3] "barkeep" "barkeeper"
## [5] "mixologist" "clerk"
## [7] "company man" "copyist"
## [9] "scribe" "scrivener"
## [11] "copywriter" "crewman"
## [13] "deliveryman" "delivery boy"
## [15] "deliverer" "dining-room attendant"
## [17] "restaurant attendant" "dispatcher"
## [19] "dog catcher" "floater"
## [21] "floorwalker" "shopwalker"
## [23] "gardener" "gasman"
## [25] "gofer" "hire"
## [27] "hired help" "hireling"
## [29] "pensionary" "jobholder"
## [31] "line worker" "liveryman"
## [33] "office boy" "organization man"
## [35] "porter" "Pullman porter"
## [37] "potboy" "potman"
## [39] "public servant" "registrar"
## [41] "salesperson" "sales representative"
## [43] "sales rep" "sandwichman"
## [45] "shop assistant" "spotter"
## [47] "spotter" "staff member"
## [49] "staffer" "stagehand"
## [51] "stage technician" "stocktaker"
## [53] "stock-taker" "sweeper"
## [55] "tollkeeper" "tollman"
## [57] "tollgatherer" "toll collector"
## [59] "toll taker" "toll agent"
## [61] "toller" "trainman"
## [63] "railroader" "railroad man"
## [65] "railwayman" "railway man"
## [67] "turncock" "typist"
## [69] "workman" "workingman"
## [71] "working man" "working person"
occ_words <- c(hyponyms("employee"), hyponyms("worker"))
body_words <- c(hyponyms("external body part"), hyponyms("body part"))
getNouns = function (x) {
tok <- getToken(annotateString(x))
lem <- unlist(tok[startsWith(tok[, "POS"], "N"), "lemma"])
}
initCoreNLP(type="english_fast")
text <- read.delim("osha.txt", header=FALSE, sep="\t", quote = "", stringsAsFactors = FALSE)
text[1,]
## V1 V2
## 1 201079928 Employee Is Burned By Forklift Radiator Fluid
## V3
## 1 At approximately 11:30 a.m. on November 13 2013 Employee #1 with Edco Waste & Recycling Services was operating a forklift (Linde Lift Truck; Serial Number: H2X393S04578; identified by the employer as FL-3) from approximately 4:00 a.m. moving bales of recyclable paper products from a collection area in the yard into trucks. Then Employee #1 cleaned and was replacing an air filter on the forklift FL-3. To clean out the air filter Employee #1 parked FL-3 in the doorway of the maintenance building. The air filter was located on the rear of the forklift behind the cab frame on the driver's side. Employee #1 removed the air filter and cleaned it out and then he climbed up onto the back of the forklift to replace it. While up on the back of the forklift Employee #1's foot dislodged the cooling system radiator cap. The fluid in the lift truck's cooling system was hot and under pressure from being operated all morning. The hot fluid sprayed up and out of the reservoir. Employee #1 was burned on the upper legs and the groin area. Employee #1 jumped off of the back of the forklift onto the ground. Coworkers came to his assistance and called emergency services. Employee #1 was hospitalized at a burn center for over 24 hours for treatment of second degree burns to the upper legs and groin area.
commentN <- text$V3
lemmaN <- sapply(commentN, getNouns)
vectorN <- VectorSource(lemmaN)
corpusN <- VCorpus(vectorN)
dtmN <- DocumentTermMatrix(corpusN, control = list (dictionary = occ_words,
stopwords = my_stopwords,
stemming = FALSE,
weighting = weightBin))
dtmB <- DocumentTermMatrix(corpusN, control = list (dictionary = body_words,
stopwords = my_stopwords,
stemming = FALSE,
weighting = weightBin))
freqN <- colSums(as.matrix(dtmN))
freqB <- colSums(as.matrix(dtmB))
sort(freqN, decreasing = TRUE)
## driver help helper
## 447 170 87
## assistant washer spotter
## 72 43 28
## volunteer clerk trimmer
## 27 26 26
## caster gutter hanger
## 24 23 21
## stripper skidder finisher
## 19 18 15
## sweeper processor scrubber
## 15 14 13
## planter puller rat
## 9 9 9
## tier oiler splitter
## 9 8 8
## stringer gardener winder
## 8 7 7
## dispatcher splicer wiper
## 6 6 6
## independent hire bleacher
## 5 4 3
## carter poster pruner
## 3 3 3
## porter temporary tugger
## 2 2 2
## workman bartender crewman
## 2 1 1
## dragger melter nailer
## 1 1 1
## peeler potman retainer
## 1 1 1
## salesperson seasonal slave
## 1 1 1
## tapper temp thrower
## 1 1 1
## topper barkeep barkeeper
## 1 0 0
## barman beater bill poster
## 0 0 0
## bill sticker blackleg boater
## 0 0 0
## boatman charcoal burner company man
## 0 0 0
## copyist copywriter cornhusker
## 0 0 0
## deliverer delivery boy deliveryman
## 0 0 0
## dining-room attendant dog catcher employable
## 0 0 0
## employee floater floorwalker
## 0 0 0
## freelance gasman gofer
## 0 0 0
## hard worker hired help hireling
## 0 0 0
## jobholder lamplighter line worker
## 0 0 0
## liveryman mixologist moonlighter
## 0 0 0
## mopper muzzler office boy
## 0 0 0
## organization man part-timer pensionary
## 0 0 0
## plier plyer potboy
## 0 0 0
## public servant Pullman porter quarrier
## 0 0 0
## quarryman ragsorter railroad man
## 0 0 0
## railroader railway man railwayman
## 0 0 0
## registrar restaurant attendant sales rep
## 0 0 0
## sales representative sandwichman scab
## 0 0 0
## scribe scrivener seasonal worker
## 0 0 0
## self-employed person servant sheller
## 0 0 0
## shelver shingler shop assistant
## 0 0 0
## shopwalker shoveler shoveller
## 0 0 0
## skilled worker skilled workman solderer
## 0 0 0
## sprigger staff member staffer
## 0 0 0
## stage technician stagehand stainer
## 0 0 0
## stemmer stock-taker stocktaker
## 0 0 0
## strikebreaker striver supporter
## 0 0 0
## tacker teaser temporary worker
## 0 0 0
## throwster tier up tiler
## 0 0 0
## toiler toll agent toll collector
## 0 0 0
## toll taker toller tollgatherer
## 0 0 0
## tollkeeper tollman trade unionist
## 0 0 0
## trained worker trainman turncock
## 0 0 0
## twiner typist union member
## 0 0 0
## unionist unpaid worker wallah
## 0 0 0
## waterman working girl working man
## 0 0 0
## working person workingman workmate
## 0 0 0
sort(freqB, decreasing = TRUE)
## area head system
## 1567 1131 683
## body face back
## 621 608 493
## bottom process chest
## 351 348 344
## neck shoulder structure
## 303 282 215
## rear member hip
## 177 156 137
## dock seat joint
## 134 128 90
## torso toe abdomen
## 87 82 74
## stomach tail apparatus
## 42 40 33
## tissue extremity lip
## 33 32 28
## buttock stump trunk
## 27 24 24
## cheek butt saddle
## 23 18 17
## backside organ small
## 15 15 13
## feature groove shin
## 12 11 10
## region belly rectum
## 9 6 6
## lobe shank breast
## 5 5 3
## flank cannon horseback
## 3 2 2
## posterior stern thorax
## 2 2 2
## appendage loin adnexa
## 1 1 0
## ambulacrum ampulla anatomical structure
## 0 0 0
## annexa arse arthromere
## 0 0 0
## articulatio articulation ass
## 0 0 0
## behind bodily structure body structure
## 0 0 0
## bum buns buttocks
## 0 0 0
## can caput cervix
## 0 0 0
## complex body part croup croupe
## 0 0 0
## derriere dilator dorsum
## 0 0 0
## energid external body part fanny
## 0 0 0
## fornix frill fundament
## 0 0 0
## gaskin haunch hind end
## 0 0 0
## hindquarters human face keister
## 0 0 0
## lineament loins lumbus
## 0 0 0
## mentum nates outgrowth
## 0 0 0
## pectus prat protoplast
## 0 0 0
## rear end rudiment ruff
## 0 0 0
## rump septum tail end
## 0 0 0
## tooshie tush underpart
## 0 0 0
## vallecula venter withers
## 0 0 0
a <- as.matrix(dtmN)
b <- as.matrix(dtmB)
setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Text Analytic/WK 1/mini project")
write.csv(a, "Occupation.csv")
write.csv(b, "BodyParts.csv")
dark2 <- brewer.pal(6, "Dark2")
wordcloud(names(freqN), freqN, max.words=60, rot.per=0.5, colors=dark2)
wordcloud(names(freqB), freqB, max.words=60, rot.per=0.5, colors=dark2)