---
title: "TEXT PROCESSING"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: journal
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(rwhatsapp)
library(tidyverse)
library(ggplot2)
library(lubridate)
library(tidytext)
library(stopwords)
library(tidymodels)
library(syuzhet)
library(RColorBrewer)
```
## dataset description {.tabset}
### per day Messages
```{r}
chat <- rwa_read("/cloud/project/whatsapp/WhatsApp Chat with Saran 🍲BU MCA.txt") %>% filter(!is.na(author))
chat %>% mutate(day = lubridate::date(time)) %>% count(day) %>% ggplot(aes(x = day, y = n)) +
geom_bar(stat = "identity") + ggtitle("Messages per day")
```
## dataset description {.tabset}
### amount of message sent by each individual
```{r}
chat %>%
count(author) %>%
ggplot(aes(x = reorder(author, n), y = n, fill=author)) +
geom_bar(stat = "identity") + xlab("Texting") + ylab("Number Of Messages") +
coord_flip() + theme_bw() +
ggtitle("Relation between the messages sent by Each Person")
```
### Words oftenly used by each person
```{r}
#Removal of words and finding the often used words
to_remove <- c(stopwords(language = 'en'),"media","omitted","ref","2","yes","yess","stu","us","go",
"bro","one","well","get","just","idk","fuck","u","1","n","ok","int","can","3","cse","pakkii","naayee","jiminipabo","love")
chatx <- chat%>% unnest_tokens(input = text,output = word) %>%
filter(!word %in% to_remove)
chatx %>% count(author, word, sort = TRUE) %>%
group_by(author) %>%
top_n(n = 6, n) %>%
ggplot(aes(x = reorder_within(word, n, author), y = n, fill = author)) +
geom_col(show.legend = FALSE) +
ylab("words") +
xlab("frequency") +
coord_flip() +
facet_wrap(~author, ncol = 2, scales = "free_y") +
scale_x_reordered() +
ggtitle("Words oftenly used by each person")
```
## dataset description {.tabset}
### Wordcloud
```{r}
wordcloud::wordcloud(chatx$word,max.words = 100,random.order = F,scale=c(2,1),colors = brewer.pal(9,"Dark2"))
```