Star Wars VIZ

library(tidyverse)

# Read the data
ep4 <- read.table("https://storage.googleapis.com/kagglesdsdata/datasets/25491/32521/SW_EpisodeIV.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20211008%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20211008T083629Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=5ada39ac9c3b13ac3bd546774e35839c9dbda901d9b9820ce4ab20514b15b3377d3efe40fb15d05fa144c4658d09bd85f8c90d07c1e32ea9745957f1cd9d5fe63c02b6abe01ff84d513dd81e60a8781c93b5665e53e730dcf011e8385da85e2f8bc08babd4b107ceb65f2cb970a9486baa908723c688f7cfe0319d420491cb5f619f569ca5da4f300eff3fbb4306d39ab6a3d6444df343841355178b98a35020974af4da548218fd59b8b43ce25dc56ee7b9eba96f91ee1f676134548ac302bf9bb063d82eed7a33ac7c28c9759f4da958e5710e2f4c5f81d7f46b64268e2a204b07de3c037d97bc7ecf5a6bf5ff6af990146b913d259c7e007966b03989c6bb")

Cumulative Distribution of Lines

The overall idea is to show a timeline of lines; however, the cumulative nature allows us to compare the distribution, frequency, and total count of lines.

ep4$line<-1:1010
ep4$one<-rep(1, 1010)

### LEIA
leia<-ep4%>%
  filter(character=="LEIA")%>%
  select(-dialogue)

leia$count<-cumsum(leia$one)

### BEN
ben<-ep4%>%
  filter(character=="BEN")%>%
  select(-dialogue)

ben$count<-cumsum(ben$one)

### HAN
han<-ep4%>%
  filter(character=="HAN")%>%
  select(-dialogue)

han$count<-cumsum(han$one)


### LUKE
luke<-ep4%>%
  filter(character=="LUKE")%>%
  select(-dialogue)

luke$count<-cumsum(luke$one)

### RED LEADER
red<-ep4%>%
  filter(character=="RED LEADER")%>%
  select(-dialogue)

red$count<-cumsum(red$one)

### THREEPIO
three<-ep4%>%
  filter(character=="THREEPIO")%>%
  select(-dialogue)

three$count<-cumsum(three$one)


### VADER
vader<-ep4%>%
  filter(character=="VADER")%>%
  select(-dialogue)

vader$count<-cumsum(vader$one)

### EMPEROR
emp<-ep4%>%
  filter(character=="EMPEROR")%>%
  select(-dialogue)

emp$count<-cumsum(emp$one)

### LANDO
lando<-ep4%>%
  filter(character=="LANDO")%>%
  select(-dialogue)

lando$count<-cumsum(lando$one)

### YODA
yoda<-ep4%>%
  filter(character=="YODA")%>%
  select(-dialogue)

yoda$count<-cumsum(yoda$one)


###### RBIND

timeline<-rbind(ben, han, leia, luke, 
                red, three, vader, emp, 
                lando, yoda)


ggplot(timeline, aes(x=line, y=count, color=character))+
  geom_line()+
  theme_bw()

ggplot(timeline, aes(x=line, y=count, fill=character))+
  geom_area(alpha=.5)+
  facet_wrap(.~character)+
  theme_bw()