A glance to the data
Looking the Data Dictionary related to this data set and the amount of missing values per column, it helps us to keep just a few important columns for the visualization stage. Those columns (the ones that I will kept for the TidyTuesday visualization) are: * char: Name of the speaking character. * char_type:Either Person or Computer * line: The complete line of dialog (may contain more speech than the speech interaction). Parenthetical are directions and not spoken. * type: The type of interaction, see detailed definitions below. * pri_type: The primary interaction type as defined by the below ranking. * domain: The domain of interaction, see detailed definitions below. * sub_domain: The sub-domain of interaction, generally a specific setting, see below. * interaction: The actual speech interaction. May be shorter or longer than the line of dialog.
I will discard is_fedand error because their distribution is always the same: TRUE and FALSE respectively.
ggarrange(chart_intro, chart_miss, ncol = 2, heights = c(10, 10))

# Top 10 speakers: included computer voice
talkers <- head(
computer %>%
group_by(char) %>%
count(sort = TRUE),10)$char
# Chart 1: Interactions
interactions <- head(computer %>% filter(char != 'Computer Voice') %>%
group_by(char) %>%
count(sort = TRUE),10)
interactions$char <- factor(interactions$char, levels = interactions$char[order(interactions$n)])
colnames(interactions) <- c('Character', 'Interactions')
# Chart 2: Wordclouds
textcleaned <- computer %>%
unnest_tokens(word, interaction) %>%
anti_join(get_stopwords(source = "snowball"))
Joining, by = "word"
df_wordcloud <- textcleaned %>% group_by(word) %>% count(sort = TRUE)
# Chart 3: Waffles
interactions_pct <- computer %>%
filter(char %in% talkers) %>%
group_by (char, type) %>%
summarise (n=n()) %>%
mutate(rel.freq = paste0(round(100 * n/sum(n), 0), "%"))
`summarise()` has grouped output by 'char'. You can override using the `.groups` argument.
backgroundcolor <- "#264653"
solidchartcolor <- "#e9c46a"
gridlines <- "#006d77"
# Chart 1: Interactions
chartinteractions<-
ggplot(data=interactions, aes(x=Character, y=Interactions)) +
geom_chicklet(stat="identity", color=solidchartcolor, fill=solidchartcolor, width = 0.75, radius = grid::unit(1, 'mm'))+
geom_text(aes(label=Interactions), hjust=1.6,vjust= 0.5, color=backgroundcolor, size=3.5)+
theme(
axis.text.x = element_text(colour = 'white'),
axis.title.x = element_text(colour = 'white'),
axis.text.y = element_text(colour = 'white'),
axis.title.y = element_text(colour = 'white'),
panel.background = element_rect(fill = backgroundcolor, colour = backgroundcolor, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = gridlines),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = gridlines),
plot.background = element_rect(fill = backgroundcolor),
plot.title = element_text(colour="white"),
plot.margin = unit(c(1,1,1,1), "cm")
)+
ggtitle("Users with more interactions")+
coord_flip()
Warning: Ignoring unknown parameters: stat
chartinteractions2<-
ggplot() +
theme(
axis.text.x = element_text(colour = 'white'),
axis.title.x = element_text(colour = 'white'),
axis.text.y = element_text(colour = 'white'),
axis.title.y = element_text(colour = 'white'),
panel.background = element_rect(fill = backgroundcolor, colour = backgroundcolor, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = gridlines),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = gridlines),
plot.background = element_rect(fill = backgroundcolor),
plot.title = element_text(colour="white"),
plot.margin = unit(c(1,1,1,1), "cm")
)+
coord_flip()
wordcloudchart <- wordcloud(words = df_wordcloud$word ,
freq = df_wordcloud$n,
min.freq = 1,max.words=200,
random.order=FALSE,
rot.per=0.35,
colors=brewer.pal(8, "Dark2")
)

library(waffle)
generatewaffle <- function(df,t,rows){
wdata <- df %>% filter(char == t)
wdata
fig <- ggplot(wdata, aes(fill = type, values = n)) +
geom_waffle(n_rows = rows, size = 0.33, colour = "white") +
theme(
axis.text.x = element_text(colour = 'white'),
axis.title.x = element_blank(),
axis.text.y = element_text(colour = 'white'),
axis.title.y = element_blank(),
panel.background = element_rect(fill = backgroundcolor, colour = backgroundcolor, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = backgroundcolor),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = backgroundcolor),
plot.background = element_rect(fill = backgroundcolor),
plot.title = element_text(colour="white"),
legend.background = element_rect(colour = "transparent", fill = backgroundcolor),
legend.title = element_text(color = backgroundcolor),
legend.text = element_text(color = "white"),
legend.key = element_rect(fill = backgroundcolor, color = NA),
)+
ggtitle(paste0(t,": Types of interaction"))
return(fig)
}
fig1 <- generatewaffle(interactions_pct, "Computer Voice", 20)
fig2 <- generatewaffle(interactions_pct, "Geordi", 20)
fig3 <- generatewaffle(interactions_pct, "Picard", 20)
fig4 <- generatewaffle(interactions_pct, "Data",10)
fig5 <- generatewaffle(interactions_pct, "Riker", 10)
fig6 <- generatewaffle(interactions_pct, "Beverly", 10)
library(ggpubr)
ggarrange(
ggarrange(chartinteractions,fig1, ncol=2),
ggarrange(fig2, fig3, ncol=2),
ggarrange(fig4, fig5, fig6, ncol=3),
nrow=3,
solidchartcolor='red'
)
Warning in as_grob.default(plot) :
Cannot convert object of class character into a grob.
$`1`
$`2`
attr(,"class")
[1] "list" "ggarrange"


