Text Network

setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/TRB 2024/PCM")
library(readxl)
library(tm)
library(quanteda)
dat= read_excel("FINAL_InvestigationReport_SD.xlsx", sheet="ori")

library(tidyverse)    
table(dat$Type)

## 
##                                           Air bag 
##                                                56 
##                                         Ambulance 
##                                                10 
##                            Child Restraint System 
##                                                29 
##                        Crash Avoidance Technology 
##                                                10 
##                            Guardrail End Terminal 
##                                                30 
##                           Guardrail End Treatment 
##                                                 9 
##                                         Hazardous 
##                                                 1 
##                                       Heavy-Truck 
##                                                 1 
##                           Malfunction ABS Failure 
##                                                 1 
##               Malfunction Alleged Weld Separation 
##                                                 1 
##                            Malfunction Brake Fail 
##                                                 1 
##                        Malfunction Cruise Control 
##                                                 1 
##              Malfunction Front Axle Shaft U-Joint 
##                                                 1 
##                  Malfunction Seat Belt Separation 
##                                                 1 
##                              Malfunction Steering 
##                                                 2 
##          Malfunction Vehicle Component 01 2020 AR 
##                                                 1 
##                           Manual Restraint System 
##                                                 1 
##                            Non-Crash Vehicle Fire 
##                                                 1 
##                               On-Site Side Impact 
##                                                 1 
##          Rear Impact Crash and Fire Investigation 
##                                                 1 
##                Remote Vehicle FireChild Restraint 
##                                                 7 
##                      Rollaway Crash Investigation 
##                                                 1 
##                    RollawayBackover Investigation 
##                                                 1 
##                                          Rollover 
##                                                 8 
##                                        School Bus 
##                                                 2 
## Side Inflatable Curtain Occupant Protection Crash 
##                                                 1 
##                      Small Overlap Oblique Impact 
##                                                 2 
##                           Unintended Acceleration 
##                                                 6 
##                                      Vehicle Fire 
##                                                 1

clean_texts <- function(x) {
  x %>%
    str_remove_all(" ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)") %>%
    str_replace_all("&amp;", "and") %>%
    str_remove_all("[[:punct:]]") %>%
    str_remove_all("^RT:? ") %>%
    str_remove_all("@[[:alnum:]]+") %>%
    str_remove_all("#[[:alnum:]]+") %>%
    str_remove_all("pictwittercom") %>%
    str_replace_all("///n", " ") %>%
    str_to_lower() %>%
    str_trim("both")
}

dat$World= dat$crashSite %>% clean_texts 
dat$Schema= dat$preCrash %>% clean_texts
dat$Action= dat$crash %>% clean_texts
dat$PostCrash= dat$postCrash %>% clean_texts

Ambulance

World

dat1= dat[, c(1, 4, 9)]
dat2= dat[, c(1, 4, 10)]
dat3= dat[, c(1, 4, 11)]
dat4= dat[, c(1, 4, 12)]


dat1= subset(dat1, Type=="Ambulance")
library(DT)
datatable(
  dat1, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('csv')
  )
)

### World

corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)

## Warning: package 'quanteda.textplots' was built under R version 4.2.3

textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= subset(dat2, Type=="Ambulance")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= subset(dat3, Type=="Ambulance")
dat1= dat3

corp1 <- corpus(dat1$Action)

## Warning: NA is replaced by empty string

all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= subset(dat4, Type=="Ambulance")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Crash Avoidance Technology

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Crash Avoidance Technology")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Crash Avoidance Technology")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Crash Avoidance Technology")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Crash Avoidance Technology")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Rollover

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Rollover")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Rollover")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Rollover")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Rollover")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Unintended Acceleration

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Unintended Acceleration")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Unintended Acceleration")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Unintended Acceleration")
dat1= dat3

corp1 <- corpus(dat1$Action)

## Warning: NA is replaced by empty string

all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Unintended Acceleration")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 20))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Guardrail End Terminal

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Guardrail End Terminal")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Guardrail End Terminal")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Guardrail End Terminal")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Guardrail End Terminal")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Child Restraint System

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Child Restraint System")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Child Restraint System")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Child Restraint System")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Child Restraint System")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 30))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Air bag

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Air bag")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Air bag")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Air bag")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Air bag")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Malfunction (Vehicle Defect)

World

dat1= dat[, c(1, 4, 9)]
dat1= subset(dat1, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
               Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
               Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
               Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
               Type=="Malfunction Vehicle Component 01 2020 AR")


corp1 <- corpus(dat1$World)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#798E87", edge_size = 2,
                 vertex_labelsize = 8)

Schema

dat2= dat[, c(1, 4, 10)]
dat2= subset(dat2, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
               Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
               Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
               Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
               Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat2

corp1 <- corpus(dat1$Schema)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))

dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()

tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#CDC08C", edge_size = 2,
                 vertex_labelsize = 8)

Action

dat3= dat[, c(1, 4, 11)]
dat3= subset(dat3, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
               Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
               Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
               Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
               Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat3

corp1 <- corpus(dat1$Action)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()



tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#C7B19C", edge_size = 2,
                 vertex_labelsize = 8)

Post-Crash

dat4= dat[, c(1, 4, 12)]
dat4= subset(dat4, Type=="Malfunction ABS Failure" | Type=="Malfunction Alleged Weld Separation" |
               Type=="Malfunction Alleged Weld Separation" | Type=="Malfunction Brake Fail" |
               Type=="Malfunction Cruise Control" | Type=="Malfunction Front Axle Shaft U-Joint" |
               Type=="Malfunction Seat Belt Separation" | Type=="Malfunction Steering" |
               Type=="Malfunction Vehicle Component 01 2020 AR")
dat1= dat4

corp1 <- corpus(dat1$PostCrash)
all_stops <- c("crash","crashes","ambulance","driver", "vehicle" , "m", "f", "ft", "cot",
               "vehicles", "cars", "na", "mph", "kmh", "winsmash", "solara", "yaris","c",
               "ae", "edr", "v", "b", "d", "e", "black", "white", "blue", "red", "g", "h", "k",
               "n", "o", "p", "q", "s", "t", "u", "w", "x", "y", "z",
               "sec","secs","cm","x","crs","sci","westbound","eastbound","southbound","northbound", stopwords("en"))
dfm1 <- corp1 |>
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(pattern = all_stops) %>%
  dfm()


tag_dfm <- dfm_select(dfm1)
toptag <- names(topfeatures(tag_dfm, 40))
tag_fcm <- fcm(tag_dfm)
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
library(quanteda.textplots)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.4,
                 edge_color = "#DD8D29", edge_size = 2,
                 vertex_labelsize = 8)

Text Network

Subasish Das (@subasish_das)

2023-07-20

Text Network

Ambulance

World

Schema

Action

Post-Crash

Crash Avoidance Technology

World

Schema

Action

Post-Crash

Rollover

World

Schema

Action

Post-Crash

Unintended Acceleration

World

Schema

Action

Post-Crash

Guardrail End Terminal

World

Schema

Action

Post-Crash

Child Restraint System

World

Schema

Action

Post-Crash

Air bag

World

Schema

Action

Post-Crash

Malfunction (Vehicle Defect)

World

Schema

Action

Post-Crash