library(tidyverse)
# Read the data
ep4 <- read.table("https://storage.googleapis.com/kagglesdsdata/datasets/25491/32521/SW_EpisodeIV.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20211008%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20211008T083629Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=5ada39ac9c3b13ac3bd546774e35839c9dbda901d9b9820ce4ab20514b15b3377d3efe40fb15d05fa144c4658d09bd85f8c90d07c1e32ea9745957f1cd9d5fe63c02b6abe01ff84d513dd81e60a8781c93b5665e53e730dcf011e8385da85e2f8bc08babd4b107ceb65f2cb970a9486baa908723c688f7cfe0319d420491cb5f619f569ca5da4f300eff3fbb4306d39ab6a3d6444df343841355178b98a35020974af4da548218fd59b8b43ce25dc56ee7b9eba96f91ee1f676134548ac302bf9bb063d82eed7a33ac7c28c9759f4da958e5710e2f4c5f81d7f46b64268e2a204b07de3c037d97bc7ecf5a6bf5ff6af990146b913d259c7e007966b03989c6bb")
The overall idea is to show a timeline of lines; however, the cumulative nature allows us to compare the distribution, frequency, and total count of lines.
ep4$line<-1:1010
ep4$one<-rep(1, 1010)
### LEIA
leia<-ep4%>%
filter(character=="LEIA")%>%
select(-dialogue)
leia$count<-cumsum(leia$one)
### BEN
ben<-ep4%>%
filter(character=="BEN")%>%
select(-dialogue)
ben$count<-cumsum(ben$one)
### HAN
han<-ep4%>%
filter(character=="HAN")%>%
select(-dialogue)
han$count<-cumsum(han$one)
### LUKE
luke<-ep4%>%
filter(character=="LUKE")%>%
select(-dialogue)
luke$count<-cumsum(luke$one)
### RED LEADER
red<-ep4%>%
filter(character=="RED LEADER")%>%
select(-dialogue)
red$count<-cumsum(red$one)
### THREEPIO
three<-ep4%>%
filter(character=="THREEPIO")%>%
select(-dialogue)
three$count<-cumsum(three$one)
### VADER
vader<-ep4%>%
filter(character=="VADER")%>%
select(-dialogue)
vader$count<-cumsum(vader$one)
### EMPEROR
emp<-ep4%>%
filter(character=="EMPEROR")%>%
select(-dialogue)
emp$count<-cumsum(emp$one)
### LANDO
lando<-ep4%>%
filter(character=="LANDO")%>%
select(-dialogue)
lando$count<-cumsum(lando$one)
### YODA
yoda<-ep4%>%
filter(character=="YODA")%>%
select(-dialogue)
yoda$count<-cumsum(yoda$one)
###### RBIND
timeline<-rbind(ben, han, leia, luke,
red, three, vader, emp,
lando, yoda)
ggplot(timeline, aes(x=line, y=count, color=character))+
geom_line()+
theme_bw()
ggplot(timeline, aes(x=line, y=count, fill=character))+
geom_area(alpha=.5)+
facet_wrap(.~character)+
theme_bw()