require(rvest)
require(purrr)
require(xml2)
require(rJava)
require(kableExtra)
require(dplyr)
require(tidytext)
require(tidyverse)
require(tm)
require(SnowballC)
require(ggplot2)
require(igraph)
require(ggraph)
require(widyr)
require(wordcloud)
require(tidyr)
require(reshape2)
write.csv(article_table1_2,"social_media_and_adolescents.csv",row.names = TRUE)
social_media_adolescents<-read.csv("https://raw.githubusercontent.com/Luz917/Data_607_Final-_Project/master/social_media_and_adolescents.csv", stringsAsFactors = FALSE)
str(social_media_adolescents)
## 'data.frame': 3 obs. of 3 variables:
## $ Article_id: int 1 2 3
## $ Title : chr "Social Media and Adolescents’ and Young Adults’ Mental Health | National Center for Health Research" "The Negative Effects of Social Media for Teens - SmartSocial" "\n\n\n\tSocial Media and Kids: Some Benefits, Some Worries\n\n\n"
## $ Text : chr "\n\t\tElina Mir and Caroline Novas, National Center for Health Research\nMost young adults use social media, an"| __truncated__ "1,312shares Print This Page510 Facebook408 Email To A Friend188 Twitter104 Gmail102\nThe next generation will e"| __truncated__ "\n\n\n\t\t\t\t\n\n\n\t\t\t\t\taaa\n\n\n\t\t\t\t\tprint\n\n\n\t\t\t\t\temail\n\n\n\t\t\t\t\t\n\n\n\t\t\t\t\t\tsh"| __truncated__
## This method was found in datacamp with their example of sentiment analysis
undesirable_words <- c("thousand", "the", "hundred","three","thousand","percent","four", "null")
social_media_tidy <- social_media_adolescents %>%
unnest_tokens(word, Text) %>%
filter(!word %in% undesirable_words) %>% #Remove undesirables
filter(!nchar(word) < 3) %>% #Words that are less than three letters
anti_join(stop_words) #Data provided by the tidytext package
glimpse(social_media_tidy)
## Observations: 2,349
## Variables: 3
## $ Article_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Title <chr> "Social Media and Adolescents’ and Young Adults’ Mental ...
## $ word <chr> "elina", "mir", "caroline", "novas", "national", "center...
totalwords1<-social_media_tidy%>%
count(word, sort= TRUE)
kable(totalwords1 [1:20, 1:2] ) %>%
kable_styling(full_width = F) %>%
column_spec(1, bold = T, border_right = F) %>%
column_spec(1, width = "15em", background = "lightblue")
| word | n |
|---|---|
| social | 122 |
| media | 113 |
| online | 29 |
| false | 20 |
| health | 20 |
| depression | 19 |
| teens | 19 |
| children | 18 |
| people | 18 |
| 17 | |
| parents | 17 |
| positive | 17 |
| time | 17 |
| effects | 16 |
| mental | 16 |
| kids | 15 |
| adolescents | 12 |
| apps | 12 |
| negative | 12 |
| phone | 12 |
library(ggplot2)
social_media_tidy %>%
count(word, sort = TRUE) %>%
filter(n > 10) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
xlab(NULL) +
coord_flip()
get_sentiments("nrc")
## # A tibble: 13,901 x 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ... with 13,891 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
get_sentiments("afinn")
## # A tibble: 2,477 x 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ... with 2,467 more rows
nrc_trust <- get_sentiments("nrc") %>%
filter(sentiment == "trust")
social_media_nrc<-social_media_tidy %>%
inner_join(nrc_trust) %>%
count(word, sort = TRUE)
kable(social_media_nrc ) %>%
kable_styling(full_width = F) %>%
column_spec(1, bold = T, border_right = F) %>%
column_spec(1, width = "15em", background = "lightblue")
| word | n |
|---|---|
| share | 9 |
| communication | 6 |
| found | 5 |
| personal | 5 |
| center | 4 |
| safe | 4 |
| institute | 3 |
| related | 3 |
| school | 3 |
| teacher | 3 |
| true | 3 |
| advise | 2 |
| association | 2 |
| friend | 2 |
| provide | 2 |
| real | 2 |
| relevant | 2 |
| academic | 1 |
| accountable | 1 |
| accounts | 1 |
| achievement | 1 |
| author | 1 |
| bank | 1 |
| calls | 1 |
| celebrity | 1 |
| communicate | 1 |
| constantly | 1 |
| content | 1 |
| deserve | 1 |
| effective | 1 |
| endless | 1 |
| engaging | 1 |
| esteem | 1 |
| exchange | 1 |
| expert | 1 |
| favorite | 1 |
| guide | 1 |
| helpful | 1 |
| inspire | 1 |
| intense | 1 |
| intimate | 1 |
| invite | 1 |
| law | 1 |
| manual | 1 |
| marshal | 1 |
| medical | 1 |
| obvious | 1 |
| organization | 1 |
| poll | 1 |
| prevalent | 1 |
| professor | 1 |
| resources | 1 |
| smith | 1 |
| suggest | 1 |
| teach | 1 |
| team | 1 |
| title | 1 |
| trust | 1 |
| volunteer | 1 |
nrc_sadness <- get_sentiments("nrc") %>%
filter(sentiment == "sadness")
social_media_nrcneg<-social_media_tidy%>%
inner_join(nrc_sadness) %>%
count(word, sort = TRUE)
kable(social_media_nrcneg ) %>%
kable_styling(full_width = F) %>%
column_spec(1, bold = T, border_right = F) %>%
column_spec(1, width = "15em", background = "lightblue")
| word | n |
|---|---|
| depression | 19 |
| negative | 12 |
| anxiety | 11 |
| suicide | 7 |
| illness | 3 |
| isolation | 3 |
| unhealthy | 3 |
| depressive | 2 |
| deprivation | 2 |
| isolated | 2 |
| loneliness | 2 |
| suffering | 2 |
| worse | 2 |
| art | 1 |
| bad | 1 |
| concerned | 1 |
| difficulty | 1 |
| disconnect | 1 |
| distress | 1 |
| endless | 1 |
| esteem | 1 |
| gray | 1 |
| hanging | 1 |
| inadequate | 1 |
| inappropriate | 1 |
| injury | 1 |
| lie | 1 |
| limited | 1 |
| missing | 1 |
| music | 1 |
| unpopular | 1 |
| worried | 1 |
| worry | 1 |
bing_word_counts <- social_media_tidy %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
kable(bing_word_counts) %>%
kable_styling(full_width = F) %>%
column_spec(1, bold = T, border_right = F) %>%
column_spec(1, width = "15em", background = "lightblue")
| word | sentiment | n |
|---|---|---|
| false | negative | 20 |
| depression | negative | 19 |
| positive | positive | 17 |
| negative | negative | 12 |
| anxiety | negative | 11 |
| suicide | negative | 7 |
| symptoms | negative | 7 |
| poor | negative | 6 |
| risk | negative | 6 |
| healthy | positive | 5 |
| issues | negative | 4 |
| popular | positive | 4 |
| safe | positive | 4 |
| benefits | positive | 3 |
| concerns | negative | 3 |
| illness | negative | 3 |
| isolation | negative | 3 |
| ready | positive | 3 |
| unhealthy | negative | 3 |
| alarm | negative | 2 |
| bullying | negative | 2 |
| concern | negative | 2 |
| difficult | negative | 2 |
| enhance | positive | 2 |
| fear | negative | 2 |
| harm | negative | 2 |
| inadequacy | negative | 2 |
| isolated | negative | 2 |
| issue | negative | 2 |
| lead | positive | 2 |
| limits | negative | 2 |
| loneliness | negative | 2 |
| obsession | positive | 2 |
| recommendations | positive | 2 |
| smart | positive | 2 |
| suffering | negative | 2 |
| susceptible | negative | 2 |
| valuable | positive | 2 |
| worse | negative | 2 |
| accomplish | positive | 1 |
| achievement | positive | 1 |
| addicted | negative | 1 |
| bad | negative | 1 |
| bored | negative | 1 |
| calm | positive | 1 |
| classic | positive | 1 |
| colorful | positive | 1 |
| comfortably | positive | 1 |
| concerned | negative | 1 |
| confront | negative | 1 |
| dangerous | negative | 1 |
| deceptively | negative | 1 |
| dedicated | positive | 1 |
| deprive | negative | 1 |
| difficulty | negative | 1 |
| distress | negative | 1 |
| distrust | negative | 1 |
| disturb | negative | 1 |
| easier | positive | 1 |
| educated | positive | 1 |
| effective | positive | 1 |
| engaging | positive | 1 |
| enhanced | positive | 1 |
| enticing | positive | 1 |
| envy | positive | 1 |
| excessive | negative | 1 |
| facilitate | positive | 1 |
| fail | negative | 1 |
| favorite | positive | 1 |
| gossip | negative | 1 |
| hang | negative | 1 |
| helpful | positive | 1 |
| helping | positive | 1 |
| ideally | positive | 1 |
| ignore | negative | 1 |
| improves | positive | 1 |
| inadequate | negative | 1 |
| inappropriate | negative | 1 |
| injury | negative | 1 |
| inspire | positive | 1 |
| integral | positive | 1 |
| intense | negative | 1 |
| interfere | negative | 1 |
| intimate | positive | 1 |
| lack | negative | 1 |
| leads | positive | 1 |
| led | positive | 1 |
| lie | negative | 1 |
| limit | negative | 1 |
| limited | negative | 1 |
| lure | negative | 1 |
| motivated | positive | 1 |
| negatives | negative | 1 |
| objection | negative | 1 |
| overwhelmingly | negative | 1 |
| passive | negative | 1 |
| positives | positive | 1 |
| problematic | negative | 1 |
| protection | positive | 1 |
| risks | negative | 1 |
| risky | negative | 1 |
| safely | positive | 1 |
| significant | positive | 1 |
| strong | positive | 1 |
| stronger | positive | 1 |
| success | positive | 1 |
| successfully | positive | 1 |
| support | positive | 1 |
| tempting | positive | 1 |
| toxic | negative | 1 |
| trust | positive | 1 |
| trusted | positive | 1 |
| unknown | negative | 1 |
| unpopular | negative | 1 |
| warned | negative | 1 |
| worried | negative | 1 |
| worries | negative | 1 |
| worry | negative | 1 |
| worst | negative | 1 |
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()
social_media_sentiment_bing <- social_media_tidy %>%
inner_join(get_sentiments("bing")) %>%
count(Article_id, index = row_number() %/% 20, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
ggplot(social_media_sentiment_bing, aes(index, sentiment, fill = Article_id)) +
geom_col(show.legend = FALSE) +
facet_wrap(~Article_id, ncol = 1, scales = "free_x")
social_media_afinn <- social_media_tidy%>%
inner_join(get_sentiments("afinn"), by = "word") %>%
group_by(Article_id)%>%
summarize(value = sum(value * n() / sum(n())))
social_media_afinn %>%
mutate(Article_id = reorder(Article_id, value)) %>%
ggplot(aes(Article_id, value, fill = value > 0)) +
geom_col(show.legend = FALSE) +
coord_flip() +
ylab("Average sentiment value")
afinn <- social_media_tidy %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = row_number() %/% 60) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
bing_and_nrc <- bind_rows(social_media_tidy %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing"),
social_media_tidy %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))) %>%
mutate(method = "NRC")) %>%
count(method, index = row_number() %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
social_media_pairs<-social_media_tidy %>%
pairwise_count(word, Article_id, sort = TRUE, upper = FALSE)
social_media_pairs
## # A tibble: 306,121 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 health research 3
## 2 health social 3
## 3 research social 3
## 4 health media 3
## 5 research media 3
## 6 social media 3
## 7 health facebook 3
## 8 research facebook 3
## 9 social facebook 3
## 10 media facebook 3
## # ... with 306,111 more rows
set.seed(1234)
social_media_pairs %>%
filter(n >= 3) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "royalblue") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void()
social_media_cors <- social_media_tidy %>%
group_by(word) %>%
filter(n() >= 10) %>%
pairwise_cor(word, Title, sort = TRUE, upper = FALSE)
social_media_cors
## # A tibble: 351 x 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 mental instagram 1.000
## 2 adults adolescents 1.000
## 3 adults sites 1.000
## 4 adolescents sites 1.000
## 5 mental apps 1.000
## 6 instagram apps 1.000
## 7 mental depression 1.000
## 8 instagram depression 1.000
## 9 apps depression 1.000
## 10 mental anxiety 1.000
## # ... with 341 more rows
set.seed(1234)
social_media_cors %>%
filter(correlation > -.5) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation, edge_width = correlation), edge_colour = "lightblue") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.3, "lines")) +
theme_void()
- You can see that some of the words have a negative correlation which are the lines that are faded
social_media_tidy %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 15))
social_media_tidy %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray80", "gray20"),
max.words = 50)
social_media_tidy %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 50)
## Joining, by = "word"
Is social media good or bad for children and adolescnets? There seems to be some benefits to social media but is the good outweighed by the bad. Looking at the Wordclouds you can see some of the positives which are healthy, safe, smart, popular. But when looking at the negative words like depression, negative, false, suicide, risk, anxiety, illness, isolation, and unhealthy. All these negative words are very serious and worrisome. The three words that stick out the most to me are DEPRESSION, SUICIDE, and ANXIETY, these words are words that I would generally put when describing adolescents and children. It’s a little scary that that is something that we have to worry about. Doing this project I am a bit more worried about my niece using social media, but its just something that we have to pay attention too.
There many difficulties doing this project, more so when cleaning the text. I wanted to use a different way to clean it but then realized that is was not the best method until I found the article in Datacamp that showed the best way to do it.
NRC Word-Emotion Association Lexicon http://www.saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm
Text Mining with R A Tidy Approach by Julia Silge and David Robinson https://www.tidytextmining.com/index.html
Tidy Sentiment Analysis in R https://www.datacamp.com/community/tutorials/sentiment-analysis-R