library(readr)
## Warning: package 'readr' was built under R version 4.2.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(quanteda)
## Warning: package 'quanteda' was built under R version 4.2.1
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "mMatrix"; definition not updated
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "replValueSp"; definition not updated
## Package version: 3.2.3
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
library(quanteda.textstats)
## Warning: package 'quanteda.textstats' was built under R version 4.2.1
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "mMatrix"; definition not updated
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "replValueSp"; definition not updated
library(quanteda.textplots)
## Warning: package 'quanteda.textplots' was built under R version 4.2.1
library(seededlda)
## Warning: package 'seededlda' was built under R version 4.2.1
## Loading required package: proxyC
## Warning: package 'proxyC' was built under R version 4.2.1
##
## Attaching package: 'proxyC'
## The following object is masked from 'package:stats':
##
## dist
##
## Attaching package: 'seededlda'
## The following object is masked from 'package:stats':
##
## terms
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
library(DT)
## Warning: package 'DT' was built under R version 4.2.1
#KWIC (keywords-in-context)
Umass <- read_csv("https://curiositybits.cc/files/umass-instagram.csv")
## Rows: 1421 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Account, User Name, Followers at Posting, Post Created, Type, URL...
## dbl (4): Likes, Comments, Views, Overperforming Score (weighted — Likes ...
## lgl (4): Like and View Counts Disabled, Title, Sponsor Id, Sponsor Name
## date (1): Post Created Date
## time (1): Post Created Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
corpus_Umass <- corpus(Umass,text_field = "Description")
## Warning: NA is replaced by empty string
data_tokens <- tokens(corpus_Umass)
kw <- kwic(data_tokens, pattern = "student*")
head(kw, 10) # SHOW 10 RECORDS OF DOCUMENTS THAT CONTAIN WORDS BEGINNING WITH 'student'
## Keyword-in-context with 10 matches.
## [text3, 94] giving, financial resources and | student |
## [text4, 21] , September 13 in the | Student |
## [text8, 19] of care to UMass Amherst | students |
## [text9, 5] As part of New | Student |
## [text9, 10] Student Orientation and Transitions, | students |
## [text10, 23] Thank you to all the | students |
## [text10, 36] and to the amazing New | Student |
## [text11, 91] : 00 p.m. - | Students |
## [text12, 4] The UMass Amherst | Student |
## [text12, 16] for the freshmen Senator and | Student |
##
## selectivity. Read more with
## Union Ballroom. UMass Amherst
## to help cope with stress
## Orientation and Transitions, students
## were invited to Athletics 101
## who joined us on Sunday
## Orientation and Transitions team for
## - 4: 15 to
## Government Association is now accepting
## Trustee elections until Tuesday,
insta_dfm <- tokens(corpus_Umass, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove_url=TRUE) %>%
tokens_remove(stopwords("en")) %>%
dfm()
topfeatures(insta_dfm) ## this shows top words by basic word count
## campus umass students amherst link
## 474 462 234 210 169
## day bio #sceneatumass today student
## 169 163 157 154 140
insta_dfm <- dfm_tfidf(insta_dfm) #create a new DFM by tf-idf scores
topfeatures(insta_dfm) ## this shows top words by tf-idf
## campus umass students amherst day health link bio
## 274.7885 271.0768 212.8361 183.0277 167.5172 158.4141 157.1493 153.7219
## student today
## 152.8655 151.2792