What’s Under the Hood When We Talk About Autonomous Vehicles

This is an exercise in visualizing relationships between words by analyzing text in news articles about autonomous vehicles.

## 
## The downloaded binary packages are in
##  /var/folders/xh/lpcnmw0s0tv272x7vrdlmvw00000gn/T//RtmpSLiuIk/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/xh/lpcnmw0s0tv272x7vrdlmvw00000gn/T//RtmpSLiuIk/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/xh/lpcnmw0s0tv272x7vrdlmvw00000gn/T//RtmpSLiuIk/downloaded_packages
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

We are now reading in data. This is a set of three full articles, one from Forbes and two from VentureBeat.

#reading in data
data2 <- read.csv("AV_NewsReader_SampleArticle.csv")
## Warning in read.table(file = file, header = header, sep = sep,
## quote = quote, : incomplete final line found by readTableHeader on
## 'AV_NewsReader_SampleArticle.csv'
text = data2$Text
#tokenizing by n-gram
art_bigrams <- data2 %>%
  unnest_tokens(bigram, text, "ngrams", n = 2)
art_bigrams %>%
  count(bigram, sort = TRUE)
## # A tibble: 2,301 x 2
##    bigram                  n
##    <chr>               <int>
##  1 autonomous vehicles    57
##  2 self driving           45
##  3 on the                 33
##  4 in the                 30
##  5 autonomous vehicle     27
##  6 driving cars           27
##  7 of the                 24
##  8 and the                21
##  9 of driverless          21
## 10 to be                  21
## # ... with 2,291 more rows
#next step
bigrams_separated2 <- art_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ")

bigrams_filtered2 <- bigrams_separated2 %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word)

# new bigram counts:
bigram_counts2 <- bigrams_filtered2 %>% 
  count(word1, word2, sort = TRUE)
#Visualizing a network of bigrams with ggraph
install.packages("igraph")
## 
## The downloaded binary packages are in
##  /var/folders/xh/lpcnmw0s0tv272x7vrdlmvw00000gn/T//RtmpSLiuIk/downloaded_packages
library(igraph)
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
bigram_graph2 <- bigram_counts2 %>%
  filter(n > 5) %>%
  graph_from_data_frame()

bigram_graph2
## IGRAPH 01aa887 DN-- 292 235 -- 
## + attr: name (v/c), n (e/n)
## + edges from 01aa887 (vertex names):
##  [1] autonomous->vehicles       autonomous->vehicle       
##  [3] driving   ->cars           autonomous->cars          
##  [5] vehicle   ->technologies   driverless->cars          
##  [7] driverless->vehicle        human     ->drivers       
##  [9] real      ->estate         technology->isn’t         
## [11] av        ->start          los       ->altos         
## [13] safety    ->standards      start     ->act           
## [15] autonomous->taxi           car       ->culture       
## + ... omitted several edges
#plotting
install.packages("ggraph")
## 
## The downloaded binary packages are in
##  /var/folders/xh/lpcnmw0s0tv272x7vrdlmvw00000gn/T//RtmpSLiuIk/downloaded_packages
library(ggraph)
## Loading required package: ggplot2
set.seed(500)

ggraph(bigram_graph2, layout = "fr") +
  geom_edge_link() +
  geom_node_point() +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1)

a <- grid::arrow(type = "closed", length = unit(.15, "inches"))

#directed graph
ggraph(bigram_graph2, layout = "fr") +
  geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
  geom_node_point(color = "lightblue", size = 5) +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
  theme_void()