setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/TRB 2024/PCM")
library(readxl)
library(tm)
library(quanteda)
dat= read_excel("FINAL_InvestigationReport_SD.xlsx", sheet="ori")
library(tidyverse)
table(dat$Type)
##
## Air bag
## 56
## Ambulance
## 10
## Child Restraint System
## 29
## Crash Avoidance Technology
## 10
## Guardrail End Terminal
## 30
## Guardrail End Treatment
## 9
## Hazardous
## 1
## Heavy-Truck
## 1
## Malfunction ABS Failure
## 1
## Malfunction Alleged Weld Separation
## 1
## Malfunction Brake Fail
## 1
## Malfunction Cruise Control
## 1
## Malfunction Front Axle Shaft U-Joint
## 1
## Malfunction Seat Belt Separation
## 1
## Malfunction Steering
## 2
## Malfunction Vehicle Component 01 2020 AR
## 1
## Manual Restraint System
## 1
## Non-Crash Vehicle Fire
## 1
## On-Site Side Impact
## 1
## Rear Impact Crash and Fire Investigation
## 1
## Remote Vehicle FireChild Restraint
## 7
## Rollaway Crash Investigation
## 1
## RollawayBackover Investigation
## 1
## Rollover
## 8
## School Bus
## 2
## Side Inflatable Curtain Occupant Protection Crash
## 1
## Small Overlap Oblique Impact
## 2
## Unintended Acceleration
## 6
## Vehicle Fire
## 1
clean_texts <- function(x) {
x %>%
str_remove_all(" ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)") %>%
str_replace_all("&", "and") %>%
str_remove_all("[[:punct:]]") %>%
str_remove_all("^RT:? ") %>%
str_remove_all("@[[:alnum:]]+") %>%
str_remove_all("#[[:alnum:]]+") %>%
str_remove_all("pictwittercom") %>%
str_replace_all("///n", " ") %>%
str_to_lower() %>%
str_trim("both")
}
dat$World= dat$crashSite %>% clean_texts
dat$Schema= dat$preCrash %>% clean_texts
dat$Action= dat$crash %>% clean_texts
dat$PostCrash= dat$postCrash %>% clean_texts
dat1= dat[, c(1, 4, 9)]
dat2= dat[, c(1, 4, 10)]
dat3= dat[, c(1, 4, 11)]
dat4= dat[, c(1, 4, 12)]
dat1= subset(dat1, Type=="Ambulance")
library(DT)
datatable(
dat1, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv')
)
)
### World
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
## Warning: package 'quanteda.textplots' was built under R version 4.2.3
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= subset(dat2, Type=="Ambulance")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
## Warning: NA is replaced by empty string
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= subset(dat4, Type=="Ambulance")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Crash Avoidance Technology")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Crash Avoidance Technology")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Crash Avoidance Technology")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Crash Avoidance Technology")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Rollover")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Rollover")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Rollover")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Rollover")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Unintended Acceleration")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Unintended Acceleration")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Unintended Acceleration")
dat1= dat3
corp1 <- corpus(dat1$Action)
## Warning: NA is replaced by empty string
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Unintended Acceleration")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 20))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Guardrail End Terminal")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Guardrail End Terminal")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Guardrail End Terminal")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Guardrail End Terminal")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Child Restraint System")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Child Restraint System")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Child Restraint System")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Child Restraint System")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Air bag")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Air bag")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Air bag")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Air bag")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)
dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
Type=="Malfunction Vehicle Component 01 2020 AR")
corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#798E87", edge_size = 2,
vertex_labelsize = 8)
dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat2
corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#CDC08C", edge_size = 2,
vertex_labelsize = 8)
dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat3
corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#C7B19C", edge_size = 2,
vertex_labelsize = 8)
dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat4
corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
"vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
"ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
"n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
"sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
tokens_remove(pattern = all_stops) %>%
dfm()
tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
edge_color = "#DD8D29", edge_size = 2,
vertex_labelsize = 8)