library(readr)
## Warning: package 'readr' was built under R version 4.2.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(quanteda)
## Warning: package 'quanteda' was built under R version 4.2.1
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "mMatrix"; definition not updated
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "replValueSp"; definition not updated
## Package version: 3.2.3
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
library(quanteda.textstats)
## Warning: package 'quanteda.textstats' was built under R version 4.2.1
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "mMatrix"; definition not updated
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "unpackedMatrix" of class "replValueSp"; definition not updated
library(quanteda.textplots)
## Warning: package 'quanteda.textplots' was built under R version 4.2.1
library(seededlda)
## Warning: package 'seededlda' was built under R version 4.2.1
## Loading required package: proxyC
## Warning: package 'proxyC' was built under R version 4.2.1
## 
## Attaching package: 'proxyC'
## The following object is masked from 'package:stats':
## 
##     dist
## 
## Attaching package: 'seededlda'
## The following object is masked from 'package:stats':
## 
##     terms
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
library(DT)
## Warning: package 'DT' was built under R version 4.2.1

#KWIC (keywords-in-context)

Umass <- read_csv("https://curiositybits.cc/files/umass-instagram.csv")
## Rows: 1421 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): Account, User Name, Followers at Posting, Post Created, Type, URL...
## dbl   (4): Likes, Comments, Views, Overperforming Score (weighted  —  Likes ...
## lgl   (4): Like and View Counts Disabled, Title, Sponsor Id, Sponsor Name
## date  (1): Post Created Date
## time  (1): Post Created Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
corpus_Umass <- corpus(Umass,text_field = "Description")   
## Warning: NA is replaced by empty string
data_tokens <- tokens(corpus_Umass)
kw <- kwic(data_tokens, pattern =  "student*") 
head(kw, 10) # SHOW 10 RECORDS OF DOCUMENTS THAT CONTAIN WORDS BEGINNING WITH 'student'
## Keyword-in-context with 10 matches.                                                               
##   [text3, 94]      giving, financial resources and | student  |
##   [text4, 21]                , September 13 in the | Student  |
##   [text8, 19]             of care to UMass Amherst | students |
##    [text9, 5]                       As part of New | Student  |
##   [text9, 10] Student Orientation and Transitions, | students |
##  [text10, 23]                 Thank you to all the | students |
##  [text10, 36]               and to the amazing New | Student  |
##  [text11, 91]                          : 00 p.m. - | Students |
##   [text12, 4]                    The UMass Amherst | Student  |
##  [text12, 16]         for the freshmen Senator and | Student  |
##                                         
##  selectivity. Read more with            
##  Union Ballroom. UMass Amherst          
##  to help cope with stress               
##  Orientation and Transitions, students  
##  were invited to Athletics 101          
##  who joined us on Sunday                
##  Orientation and Transitions team for   
##  - 4: 15 to                             
##  Government Association is now accepting
##  Trustee elections until Tuesday,

DFM based on the UMass Instagram account’s posts.

insta_dfm <- tokens(corpus_Umass, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove_url=TRUE) %>%  
  tokens_remove(stopwords("en")) %>%
  dfm()
topfeatures(insta_dfm) ## this shows top words by basic word count
##        campus         umass      students       amherst          link 
##           474           462           234           210           169 
##           day           bio #sceneatumass         today       student 
##           169           163           157           154           140

Top words ranked by TF-IDF

insta_dfm <- dfm_tfidf(insta_dfm) #create a new DFM by tf-idf scores
topfeatures(insta_dfm) ## this shows top words by tf-idf
##   campus    umass students  amherst      day   health     link      bio 
## 274.7885 271.0768 212.8361 183.0277 167.5172 158.4141 157.1493 153.7219 
##  student    today 
## 152.8655 151.2792

Semantic network- DFM that contains hashtags and top 30 hashtags in the DFM

tag_dfm <- dfm_select(insta_dfm, pattern = "#*")
toptag <- names(topfeatures(tag_dfm, 30)) # get top 30 results
head(toptag)
## [1] "#sceneatumass"    "#umassamherst"    "#flashbackfriday" "#college"        
## [5] "#university"      "#umassgives"

#Construct feature-occurrence matrix of hashtags

tag_fcm <- fcm(tag_dfm)
head(tag_fcm)
## Feature co-occurrence matrix of: 6 by 333 features.
##                     features
## features             #swamyselfie #throwback #welcometoumass #flashbackfriday
##   #swamyselfie           3.393128   0               0                 0      
##   #throwback             0          3.393128        0                 0      
##   #welcometoumass        0          0               3.393128          0      
##   #flashbackfriday       0          0               0                12.63902
##   #tbt                   0          0               0                 0      
##   #throwbackthursday     0          0               0                 0      
##                     features
## features                 #tbt #throwbackthursday #umasshistory
##   #swamyselfie        0                 0             0       
##   #throwback          0                 0             0       
##   #welcometoumass     0                 0             0       
##   #flashbackfriday    0                 0             5.517717
##   #tbt               11.24306           4.945201      4.945201
##   #throwbackthursday  0                11.854791     33.829247
##                     features
## features             #umasssummerphoto22 #indigenousday #indigenouspeoplesday
##   #swamyselfie                         0              0                     0
##   #throwback                           0              0                     0
##   #welcometoumass                      0              0                     0
##   #flashbackfriday                     0              0                     0
##   #tbt                                 0              0                     0
##   #throwbackthursday                   0              0                     0
## [ reached max_nfeat ... 323 more features ]

#Visualize the semantic network based on hashtag co-occurrence

topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.5, edge_size = 1, edge_color = "orange")

LDA model based on the Instagram posts. Set the number of topics to 25. Show top 10 terms from each topic

library(seededlda)
tmod_lda <- textmodel_lda(insta_dfm, k = 25) # Set k=10 and look for 25 topics in the dfm.

terms(tmod_lda, 10) #Extract the 10 most important terms for each topic from the topic model 
##       topic1        topic2       topic3     topic4   topic5         
##  [1,] "make"        "orchard"    "black"    "farm"   "counseling"   
##  [2,] "#umassgives" "sunsets"    "presence" "just"   "health"       
##  [3,] "gift"        "fall"       "check"    "gear"   "center"       
##  [4,] "favorite"    "warm"       "look"     "aj"     "community"    
##  [5,] "giving"      "huge"       "research" "hadley" "help"         
##  [6,] "together"    "hill"       "website"  "cute"   "spring"       
##  [7,] "changed"     "spring"     "excited"  "seen"   "semester"     
##  [8,] "give"        "colors"     "wanted"   "days"   "support"      
##  [9,] "best"        "engagement" "radical"  "gone"   "learn"        
## [10,] "thank"       "met"        "videos"   "last"   "psychological"
##       topic6          topic7       topic8               topic9       topic10  
##  [1,] "chancellor"    "a.m"        "nursing"            "sexual"     "really" 
##  [2,] "inclusion"     "graduating" "@umass_scua"        "current"    "people" 
##  [3,] "emails"        "senior"     "#flashbackfriday"   "assault"    "major"  
##  [4,] "racism"        "new"        "college"            "known"      "class"  
##  [5,] "racist"        "united"     "marieb"             "challenge"  "like"   
##  [6,] "black"         "ice"        "student"            "part"       "get"    
##  [7,] "community"     "washington" "elaine"             "initiative" "able"   
##  [8,] "subbaswamy"    "attacks"    "innovation"         "review"     "thought"
##  [9,] "investigation" "food"       "#throwbackthursday" "last"       "think"  
## [10,] "equity"        "sean"       "#umasshistory"      "arts"       "got"    
##       topic11   topic12              topic13     topic14        topic15     
##  [1,] "summer"  "rising"             "energy"    "best"         "happy"     
##  [2,] "us"      "researcher"         "worked"    "universities" "closed"    
##  [3,] "photos"  "lab"                "assistant" "magazine"     "offices"   
##  [4,] "photo"   "gateway"            "project"   "know"         "holiday"   
##  [5,] "win"     "rising-researchers" "black"     "student"      "monday"    
##  [6,] "tag"     "helping"            "program"   "u.s"          "observance"
##  [7,] "contest" "read"               "art"       "cap"          "classes"   
##  [8,] "card"    "chemistry"          "chemical"  "hear"         "garden"    
##  [9,] "now"     "umass.edu"          "research"  "year"         "enjoy"     
## [10,] "chance"  "use"                "served"    "graduation"   "dining"    
##       topic16      topic17     topic18     topic19               topic20   
##  [1,] "juneteenth" "vibes"     "contact"   "view"                "dogs"    
##  [2,] "justice"    "finals"    "time"      "pond"                "student" 
##  [3,] "june"       "hot"       "can"       "image"               "makes"   
##  [4,] "society"    "sports"    "tracing"   "library"             "like"    
##  [5,] "sunwheel"   "way"       "moodle"    "description"         "thanks"  
##  [6,] "racism"     "pride"     "household" "@douhan_photography" "check"   
##  [7,] "americans"  "making"    "virtually" "@shtewpac"           "one"     
##  [8,] "rain"       "happy"     "us"        "du"                  "last"    
##  [9,] "police"     "southwest" "campus"    "old"                 "union"   
## [10,] "say"        "family"    "outside"   "bois"                "ballroom"
##       topic21     topic22            topic23        topic24         
##  [1,] "testing"   "game"             "commencement" "band"          
##  [2,] "center"    "#flagship"        "back"         "minuteman"     
##  [3,] "covid-19"  "#flashbackfriday" "welcome"      "international" 
##  [4,] "health"    "set"              "class"        "parade"        
##  [5,] "public"    "hockey"           "soon"         "marching"      
##  [6,] "mask"      "@umassband"       "college"      "s"             
##  [7,] "students"  "@umasshockey"     "weekend"      "rose"          
##  [8,] "remain"    "music"            "honors"       "@sam_minuteman"
##  [9,] "in-person" "love"             "graduates"    "sam"           
## [10,] "open"      "home"             "commonwealth" "#roseparade"   
##       topic25         
##  [1,] "#umassamherst" 
##  [2,] "#college"      
##  [3,] "#university"   
##  [4,] "#massachusetts"
##  [5,] "#campus"       
##  [6,] "contest"       
##  [7,] "rules"         
##  [8,] "#snow"         
##  [9,] "photo"         
## [10,] "#umass"