library(wordcloud)
## Loading required package: RColorBrewer
library(tm)
## Loading required package: NLP
library(textclean)
library(tidytext)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(parallel)
library(tokenizers)
library(tau)
library(NLP)
library(stringr)
library(devtools)
## Loading required package: usethis
library(quanteda)
## Warning in stringi::stri_info(): Your current locale is not in the list of
## available locales. Some functions may not work properly. Refer to
## stri_locale_list() for more details on known locale specifiers.
## Warning in stringi::stri_info(): Your current locale is not in the list of
## available locales. Some functions may not work properly. Refer to
## stri_locale_list() for more details on known locale specifiers.
## Package version: 3.3.1
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
##
## stopwords
## The following objects are masked from 'package:NLP':
##
## meta, meta<-
library(kayadata)
library(syuzhet)
library(e1071)
library(sentimentr)
##
## Attaching package: 'sentimentr'
## The following object is masked from 'package:syuzhet':
##
## get_sentences
library(SentimentAnalysis)
##
## Attaching package: 'SentimentAnalysis'
## The following object is masked from 'package:base':
##
## write
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pacman)
pacman::p_load_gh("trinker/textstem")
pacman::p_load(textstem, dplyr)
#input data
setwd("C:/Users/LENOVO/Documents/UTS_ST NURAVIAT AWAINA_E0221003")
datatimnas<- read.csv("~/UTS_ST NURAVIAT AWAINA_E0221003/timnas.csv", sep=";")
rev<-datatimnas$full_text
head(rev)
## [1] "Looks like African playing for indonesia, should have played all 11 African players"
## [2] "Hugo Samir Bin Jackson F Tiago 💪ðŸ\u008f¿"
## [3] "Masuk ngegolim, Hugo samir✌"
## [4] "golnya keren & tidak disangka sangka ðŸ‘\u008d🤣"
## [5] "alhamdulillah menang"
## [6] "Mantap Garuda muda."
rev <- tolower(rev)
head(rev)
## [1] "looks like african playing for indonesia, should have played all 11 african players"
## [2] "hugo samir bin jackson f tiago ðÿ’ªðÿ\u008f¿"
## [3] "masuk ngegolim, hugo samir✜"
## [4] "golnya keren & tidak disangka sangka ðÿ‘\u008dðÿ¤£"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda."
#Mengembalikan Kata yang disingkat Menjadi Kata Aslinya
rev <- replace_contraction(rev)
head(rev)
## [1] "looks like african playing for indonesia, should have played all 11 african players"
## [2] "hugo samir bin jackson f tiago ðÿ’ªðÿ\u008f¿"
## [3] "masuk ngegolim, hugo samir✜"
## [4] "golnya keren & tidak disangka sangka ðÿ‘\u008dðÿ¤£"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda."
#Mengembalikan Kata yang Mengalami Perpanjangan Menjadi Kata Aslinya
rev <- replace_word_elongation(rev)
head(rev)
## [1] "looks like african playing for indonesia, should have played all 11 african players"
## [2] "hugo samir bin jackson f tiago ðÿ’ªðÿ\u008f¿"
## [3] "masuk ngegolim, hugo samir✜"
## [4] "golnya keren & tidak disangka sangka ðÿ‘\u008dðÿ¤£"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda."
#Menghapus simbol
rev <- strip(rev)
head(rev)
## [1] "looks like african playing for indonesia should have played all african players"
## [2] "hugo samir bin jackson f tiago ðÿªðÿ\u008f"
## [3] "masuk ngegolim hugo samir✜"
## [4] "golnya keren amp tidak disangka sangka ðÿ\u008dðÿ"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda"
#stemming/lemmatizing = kata dasar
stem_strings(rev)
## [1] "look like african plai for indonesia should have plai all african player"
## [2] "hugo samir bin jackson f tiago ðÿªðÿ\u008f"
## [3] "masuk ngegolim hugo samir✜"
## [4] "golnya keren amp tidak disangka sangka ðÿ\u008dðÿ"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda"
## [7] "garuda bangkit lagii"
## [8] "yukk garuda bisa"
## [9] "mantap garuda muda menang di laga perdana asian game ðÿ\u008dðÿðÿ httpstcoaaryso ipz"
## [10] "cakep speed kencengðÿœ"
## [11] "ramai rumakiek dan hugo samir bawa timna indonesia u menang httpstcoz hiclqtsd"
## [12] "kelebihan para pemain indonesia adalah speed kenceng"
## [13] "jackson f thiago pasti bahagia"
## [14] "coach jacksen f tiago tersenyum simpul httpstcoadveosdcq"
## [15] "gila lariny cepet banget"
## [16] "pemain potensi cuma dia sere emosian tinggal atasi itu dan teru berkembang"
## [17] "itu kalo witan gak gol"
## [18] "sempet dag dig dug dan pesimi liat mainnya di babak pertama monoton bgt tpi alhamdulillah bisa berprogr dibabak keduaga truss garuda"
## [19] "laju banget akselerasinya"
## [20] "good timna ðÿðÿðÿðÿ\u008f good coach indra sjahfri biasa nya mreka klau timna menang sty yg di banggakan sampai muji gak ktulunganðÿ giliran coach lokal mreka mengakui para pmain timna ah mata sty anðÿ"
## [21] "punya prospek bagu dimasa depan asal jangan cepat pua banyak berlatih dan jaga emosi ingat dia pernah divoni tahun gak boleh main bola garagara emosian menendang wasit"
## [22] "hafidz quran nih anak keren respect"
## [23] "pemain mana hugo samir"
## [24] "hugo samir i supersub âšðÿðÿðÿðÿ"
## [25] "masa kejayaan sepak bola indonesiasegera dimulai waktunya bersatu dan do'akan vike jakarta"
## [26] "the next osimhen"
## [27] "gue bilang juga apa coach i ini luckyn gede bgt"
## [28] "dulu pa dipersi kalo kelakukannya ga jabang bayik udah jadi super star ni bocah wkwk ytta aja wkwk"
## [29] "nah bener ini kayak ginidapat umpanbawa sat set sat set gol gak usah nekuk dulugak perlu gocek dulu ini modelan ivar jener nihayo pemain timna biasakan kayak gitu kalau dapat bola enakeun teh"
## [30] "kek unexpect banget kirain mau dipegang sama kiperðÿ"
## [31] "ini siapa sih yg lg bakar jagung pa nonton bolaðÿž"
## [32] "egi suruh latihan lebih banyak lagi next hugo samir yang jadi starter ðÿ\u0081"
## [33] "cuma rada kasar dia mainnya haru ditatar dulu sama bapaknya ðÿ"
## [34] "kipernya terlalu cepat keluarðÿ"
## [35] "udah takut kirgistan mainnya sepadan sama uzbekistan eh ngak tahunya skillnya dibawah timna kl pelatihnya sty babak aja udah bisa bobol gol tuh kirgistan"
## [36] "keren bet oshimen"
## [37] "walaupun maennya ngantuk tapi stamina kita lebih bagu daripada kirjisgan"
## [38] "witan udah sere dapet kesempatan begini cm keberuntungannya setipi tissueðÿ"
## [39] "lanjutkan jaga emosi jangan samp kena hukuman komdi lagi"
## [40] "keren tempaan bapaknya sih kalem akuratgol"
## [41] "intst ðÿ "
## [42] "repli isinya witan ðÿðÿðÿ jadi inget lawan thailand di final tae"
## [43] "osimhen"
## [44] "ðÿðÿðÿðÿðÿðÿðÿ"
## [45] "jadi inget liverpool v citi"
## [46] "ronaldo ga ada ya"
## [47] "mantap httpstcolaqsecx e httpstco ilp ffyub"
## [48] "kenceng kieu larinya"
## [49] "jacksen banget"
## [50] "hugo samir naturalisasi ya"
## [51] "mantab timnasdai httpstcoo a vcpnz"
## [52] "kualita siarannya bolehlah"
## [53] "tidak disangka sangka"
## [54] "hugo calon bintang"
## [55] "untung b menangsecara permainan msh berantakantdk ada pengatur serangan"
## [56] "ðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿ"
## [57] "gol pertamanya ada yg punya"
## [58] "hahahakipernya kena tipu"
## [59] "setelah nonton pertandingan ini ternyata kirikgistan ga sejago itutimna bikin ngantuk krn lucki aja iniðÿ"
## [60] "ngelihat selebrasinya ngerasain kalau dia bangga banget memakai jersei dengan lambang di dada"
## [61] "keren nii"
## [62] "mantab lanjutkan garuda muda"
## [63] "mantap garuda indonesiaaðÿª"
## [64] "tipcoineth tip"
## [65] "akmaliaaf"
## [66] "keren indonesia ku"
## [67] "kalo witan mah gak gol ini"
## [68] "keren cmn tolong nih lain kali jangan terlalu byk maen dibelakang kecolongan baru tau"
## [69] "mbapp versi lite"
## [70] "victor osimhen versi indonesia"
## [71] "ðÿðÿðÿ"
## [72] "beruntung aja wkwk"
## [73] "ga perlu gocak gocek cari posisi untuk eksekusi"
## [74] "kasih starter line up napa jgn egi mulu haha"
## [75] "titisan ronaldo sihðÿ"
## [76] "hubungi"
## [77] "top"
## [78] "jaga disiplin jangan lagi kau sepak itu wasit ya ðÿ\u0081"
## [79] "mainmu saiki ajur"
## [80] "mudryk kudu liat ini sih"
## [81] "itu blunder bek lawan bukan konsep serangan balik"
## [82] "belum kompak masih sere salah pass golnya karena skill individu rumakiek dan blunder lawan plu kecepatan dan kecerdasan hugo"
## [83] "hugo anak baik anak soleh masyaallah sejak kecil ikutan sholat mamanya udh gede hafidz quran lo"
## [84] "gara hugo samir indonesia kalah"
## [85] "hugo samir si wonderkid sepakbola indonesia ini ternyata penghaf alquran lho gak nyangka anak jacksen f thiago mantep juga yaa httpstcoegdqw kbd"
## [86] "ini liga tarkam kah gaenak banget diliat"
## [87] "all naturalisasi timna naturalisasi"
## [88] "semoga pemain timna kita teru berkembang lebih baik lagi ketika umurnya sudah dewasa nanti aamiin"
## [89] "masih ada bek yg posisinya sejajar itu kiper ngapain maju ðÿ "
## [90] "mantap"
## [91] "ahayðÿ"
## [92] "selebrasi dlu ygy sbelum bola masuk ke gawang ðÿðÿ\u008fðÿ\u008f"
## [93] "mainnya gk ngalir krn tdk ada playmak sprti beckham atau marselino alhamdulillah tetap b meraih kemenangan"
## [94] "siaranbolal alhamdulillah menang"
## [95] "bolanya mengelind beba keren"
## [96] "klo mainnya msh kyk gini ngeri gk sih nanti ktmu korut"
## [97] "umpan lambung ini yg aku rindukan wkwk"
## [98] "witan haru liat ini"
## [99] "ampun dah itu bek nya masih bisa recov tapi kipernyaa malah maju akwkaoakaokaoaka"
## [100] "selebrasi dluan sbelum gol"
## [101] "entah knp kok lht kyk permainan tarkam"
## [102] "kurang gregwt krn penonton nya sepibiasa rameklo main di negara sendiri jd seru"
## [103] "mun nu najongna dedikusnandar asup moalnya"
## [104] "hugo i the boss ðÿœ"
## [105] "jadi inget witan"
## [106] "mutiara hitam"
## [107] "bore liat permaenan timna"
## [108] "goal nya macam taiwoawoniyi"
## [109] "gokil mbapp"
## [110] "keren golnya sekela champion"
## [111] "skill"
## [112] "keep strong and win kitagaruda timnasdai garudamendunia tip"
## [113] "ya masa bapaknya gana anaknya ga bisa buat goal ðÿ\u0081"
lemmatize_strings(rev)
## [1] "look like african play for indonesia should have play all african player"
## [2] "hugo samir bin jackson f tiago ðÿªðÿ\u008f"
## [3] "masuk ngegolim hugo samir✜"
## [4] "golnya keren amp tidak disangka sangka ðÿ\u008dðÿ"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda"
## [7] "garuda bangkit lagii"
## [8] "yukk garuda bisa"
## [9] "mantap garuda muda menang di laga perdana asian game ðÿ\u008dðÿðÿ httpstcoaaryso ipz"
## [10] "cakep speed kencengðÿœ"
## [11] "ramai rumakiek dan hugo samir bawa timnas indonesia u menang httpstcoz hiclqtsd"
## [12] "kelebihan para pemain indonesia adalah speed kenceng"
## [13] "jackson f thiago pasti bahagia"
## [14] "coach jacksen f tiago tersenyum simpul httpstcoadveosdcq"
## [15] "gila larinye cepet banget"
## [16] "pemain potensial cuma dia sering emosian tinggal atasi itu dan terus berkembang"
## [17] "itu kalo witan gak gol"
## [18] "sempet dag dig dig dan pesimis liat mainnya di babak pertama monoton bgt tpi alhamdulillah bisa berprogres dibabak keduagas truss garuda"
## [19] "laju banget akselerasinya"
## [20] "good timnas ðÿðÿðÿðÿ\u008f good coach indra sjahfri biasa nya mreka klau timnas menang sty yg di banggakan sampai muji gak ktulunganðÿ giliran coach lokal mreka mengakui para pmain timnas ah mata sty anðÿ"
## [21] "punya prospek bagus dimasa depan asal jangan cepat puas banyak berlatih dan jaga emosi ingat dia pernah divonis tahun gak boleh main bola garagara emosian menendang wasit"
## [22] "hafidz quran nih anak keren respect"
## [23] "pemain mana hugo samir"
## [24] "hugo samir be supersub âšðÿðÿðÿðÿ"
## [25] "masa kejayaan sepak bola indonesiasegera dimulai waktunya bersatu dan do'akan viking jakarta"
## [26] "the next osimhen"
## [27] "gue bilang juga apa coach be ini luckyne gede bgt"
## [28] "dulu pa dipersis kalo kelakukannya ga jabang bayik udah jadi super star ni bocah wkwk ytta aja wkwk"
## [29] "nah bener ini kayak ginidapat umpanbawa sit set sit set gol gak usah nekuk dulugak perlu gocek dulu ini modelan ivar jener nihayo pemain timnas biasakan kayak gitu kalau dapat bola enakeun teh"
## [30] "kek unexpected banget kirain mau dipegang sama kiperðÿ"
## [31] "ini siapa sih yg lg bakar jagung pa nonton bolaðÿž"
## [32] "egi suruh latihan lebih banyak lagi next hugo samir yang jadi starter ðÿ\u0081"
## [33] "cuma rada kasar dia mainnya harus ditatar dulu sama bapaknya ðÿ"
## [34] "kipernya terlalu cepat keluarðÿ"
## [35] "udah takut kirgistan mainnya sepadan sama uzbekistan eh ngak tahunya skillnya dibawah timnas kl pelatihnya sty babak aja udah bisa bobol gol tuh kirgistan"
## [36] "keren bet oshimen"
## [37] "walaupun maennya ngantuk tapi stamen kita lebih bagus daripada kirjisgan"
## [38] "witan udah sering dapet kesempatan begini cm keberuntungannya setipis tissueðÿ"
## [39] "lanjutkan jaga emosi jangan sampe kena hukuman komdis lagi"
## [40] "keren tempaan bapaknya sih kalem akuratgol"
## [41] "intsting ðÿ "
## [42] "reply isinya witan ðÿðÿðÿ jadi inget lawan thailand di final tae"
## [43] "osimhen"
## [44] "ðÿðÿðÿðÿðÿðÿðÿ"
## [45] "jadi inget liverpool vs city"
## [46] "ronaldo ga ada you"
## [47] "mantap httpstcolaqsecx e httpstco ilp ffyub"
## [48] "kenceng kieu larinya"
## [49] "jacksen bangets"
## [50] "hugo samir naturalisasi you"
## [51] "mantab timnasday httpstcoo a vcpnz"
## [52] "kualitas siarannya bolehlah"
## [53] "tidak disangka sangka"
## [54] "hugo calon bintang"
## [55] "untung b menangsecara permainan msh berantakantdk ada pengatur serangan"
## [56] "ðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿðÿ"
## [57] "gol pertamanya ada yg punya"
## [58] "hahahakipernya kena tipu"
## [59] "setelah nonton pertandingan ini ternyata kirikgistan ga sejago itutimnas bikin ngantuk krn lucky aja iniðÿ"
## [60] "ngelihat selebrasinya ngerasain kalau dia bangga banget memakai jersey dengan lambang di dada"
## [61] "keren nii"
## [62] "mantab lanjutkan garuda muda"
## [63] "mantap garuda indonesiaaðÿª"
## [64] "tipcoineth tip"
## [65] "akmaliaaf"
## [66] "keren indonesia ku"
## [67] "kalo witan mah gak gol ini"
## [68] "keren cmn tolong nih lie kali jangan terlalu byk maen dibelakang kecolongan baru tau"
## [69] "mbappe versi lite"
## [70] "victor osimhen versi indonesia"
## [71] "ðÿðÿðÿ"
## [72] "beruntung aja wkwk"
## [73] "ga perlu gocak gocek cari posisi untuk eksekusi"
## [74] "kasih starter line up napa jgn egy mulu haha"
## [75] "titisan ronaldo sihðÿ"
## [76] "hubungi"
## [77] "top"
## [78] "jaga disiplin jangan lagi kau sepak itu wasit you ðÿ\u0081"
## [79] "mainmu saiki ajur"
## [80] "mudryk kudu liat ini sih"
## [81] "itu blunder bek lawan bukan konsep serangan balik"
## [82] "belum kompak masih sering salah pass golnya karena skill individu rumakiek dan blunder lawan plus kecepatan dan kecerdasan hugo"
## [83] "hugo anak baik anak soleh masyaallah sejak kecil ikutan sholat mamanya udh gede hafidz quran lo"
## [84] "gara hugo samir indonesia kalah"
## [85] "hugo samir si wonderkid sepakbola indonesia ini ternyata penghafal alquran lho gak nyangka anak jacksen f thiago mantep juga yaa httpstcoegdqw kbd"
## [86] "ini liga tarkam kah gaenak banget diliat"
## [87] "all naturalisasi timnas naturalisasi"
## [88] "semoga pemain timnas kita terus berkembang lebih baik lagi ketika umurnya sudah dewasa nanti aamiin"
## [89] "masih ada bek yg posisinya sejajar itu kiper ngapain maju ðÿ "
## [90] "mantap"
## [91] "ahayðÿ"
## [92] "selebrasi dlu ygy sbelum bola masuk ke gawang ðÿðÿ\u008fðÿ\u008f"
## [93] "mainnya gk ngalir krn tdk ada playmaker sprti beckham atau marselino alhamdulillah tetap b meraih kemenangan"
## [94] "siaranbolalive alhamdulillah menang"
## [95] "bolanya mengelinding bebas keren"
## [96] "klo mainnya msh kyk gini ngeri gk sih nanti ktmu korut"
## [97] "umpan lambung ini yg aku rindukan wkwk"
## [98] "witan harus liat ini"
## [99] "ampun dah itu bek nya masih bisa recover tapi kipernyaa malah maju akwkaoakaokaoaka"
## [100] "selebrasi dluan sbelum gol"
## [101] "entah knp kok lht kyk permainan tarkam"
## [102] "kurang gregwt krn penonton nya sepibiasa rameklo main di negara sendiri jd seru"
## [103] "mun nu najongna dedikusnandar asup moalnya"
## [104] "hugo be the boss ðÿœ"
## [105] "jadi inget witan"
## [106] "mutiara hitam"
## [107] "bore liat permaenan timnas"
## [108] "goal nya macam taiwoawoniyi"
## [109] "gokil mbappe"
## [110] "keren golnya sekelas champion"
## [111] "skill"
## [112] "keep strong and win kitagaruda timnasday garudamendunia tip"
## [113] "you masa bapaknya ganas anaknya ga bisa buat goal ðÿ\u0081"
sc<-c("driver","drive","drove","driven","drives","driving")
stem_words(sc)
## [1] "driver" "drive" "drove" "driven" "drive" "drive"
lemmatize_words(sc)
## [1] "driver" "drive" "drive" "drive" "drive" "drive"
#menghapus kata penghubung
rev <-removeWords(rev, c("di","dan","yang","akan","agar","seperti","yaitu","kami","kami",
"mari","pada","jelang","dimana","dengan","sudah","ini","seluruh",
"diminta","tak","itu","hai","bisa","wib","oleh","mai","jam",
"masa","berikut","kalau","klik","ibodwq","terd","httpstconvv",
"httpstcoxu","yzmrlyx","tahapan","refaabdi","kota","kpu","kpuid","rt","hingga","saat",
"belum","apa","sih","suara","pesta","dindap","http","httpstco","asn","bakal"))
head(rev)
## [1] "looks like african playing for indonesia should have played all african players"
## [2] "hugo samir bin jackson f tiago ðÿªðÿ\u008f"
## [3] "masuk ngegolim hugo samir✜"
## [4] "golnya keren amp tidak disangka sangka ðÿ\u008dðÿ"
## [5] "alhamdulillah menang"
## [6] "mantap garuda muda"
write.csv(rev,file = "C:/Users/LENOVO/Documents/UTS_ST NURAVIAT AWAINA_E0221003/databersih.csv", row.names = F)
##Membuat Word Cloud #Mengubah Data Frame Menjadi Data Faktor
tdm <- TermDocumentMatrix(rev)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing = TRUE)
#Mengubah Data Faktor Menjadi Data Frame
d <- data.frame(word = names(v), freq = v)
wordcloud(d$word, d$freq,
random.order = FALSE,
max.words = 500,
colors = brewer.pal(name = "Dark2",8 ))
############################
tdm <- TermDocumentMatrix(rev,
control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 520, documents: 113)>>
## Non-/sparse entries: 761/57999
## Sparsity : 99%
## Maximal term length: 24
## Weighting : term frequency (tf)
(freq.terms <- findFreqTerms(tdm, lowfreq = 5))
## [1] "indonesia" "hugo" "samir" "keren" "menang" "garuda"
## [7] "mantap" "timnas" "pemain" "banget" "gak" "gol"
## [13] "witan" "mainnya" "yg" "ga" "jadi" "ada"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 5)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x = term, y = freq)) + geom_bar(stat = "identity") +
xlab("Terms") + ylab("Count") + coord_flip()
#which words are associated with ‘indonesia’?
findAssocs(tdm, "indonesia", 0.25)
## $indonesia
## african for have like looks
## 0.37 0.37 0.37 0.37 0.37
## played players playing should bawa
## 0.37 0.37 0.37 0.37 0.37
## hiclqtsd httpstcoz ramai u adalah
## 0.37 0.37 0.37 0.37 0.37
## kelebihan ku victor gara kalah
## 0.37 0.37 0.37 0.37 0.37
## alquran httpstcoegdqw kbd lho mantep
## 0.37 0.37 0.37 0.37 0.37
## nyangka penghafal sepakbola si wonderkid
## 0.37 0.37 0.37 0.37 0.37
## yaa samir hugo
## 0.37 0.36 0.25
#which words are associated with ‘gol’?
findAssocs(tdm, "gol", 0.2)
## $gol
## gak kalo bener biasakan dapat dulugak enakeun
## 0.47 0.45 0.40 0.40 0.40 0.40 0.40
## ginidapat gitu ivar jener kayak modelan nah
## 0.40 0.40 0.40 0.40 0.40 0.40 0.40
## nekuk nihayo sat set teh umpanbawa usah
## 0.40 0.40 0.40 0.40 0.40 0.40 0.40
## bobol dibawah eh kirgistan kl ngak pelatihnya
## 0.40 0.40 0.40 0.40 0.40 0.40 0.40
## sepadan skillnya tahunya takut tuh uzbekistan pertamanya
## 0.40 0.40 0.40 0.40 0.40 0.40 0.40
## mah dluan udah witan babak punya gocek
## 0.40 0.40 0.31 0.30 0.27 0.27 0.27
## perlu sbelum selebrasi dulu sama
## 0.27 0.27 0.27 0.21 0.21
tdm2 <- removeSparseTerms(tdm, sparse = 0.95)
m2 <- as.matrix(tdm2)
distMatrix <- dist(scale(m2))
fit <- hclust(distMatrix, method = "ward.D")
plot(fit)
rect.hclust(fit, k = 5) # cut tree into 6 clusters
m3 <- t(m2) # transpose the matrix to cluster documents (tweets)
set.seed(122) # set a fixed random seed
k <- 5 # number of clusters
kmeansResult <- kmeans(m3, k)
round(kmeansResult$centers, digits = 3) # cluster centers
## indonesia hugo samir keren garuda timnas gak gol witan
## 1 0.000 0 0.000 0 0.000 0.333 0.500 1 0.333
## 2 0.000 0 0.000 0 0.000 3.000 1.000 0 0.000
## 3 0.111 0 0.000 1 0.000 0.000 0.000 0 0.000
## 4 0.231 1 0.615 0 0.000 0.077 0.077 0 0.000
## 5 0.036 0 0.000 0 0.083 0.036 0.012 0 0.048
for (i in 1:k) {
cat(paste("cluster ", i, ": ", sep = ""))
s <- sort(kmeansResult$centers[i, ], decreasing = T)
cat(names(s)[1:5], "\n")
# print the tweets of every cluster
# print(tweets[which(kmeansResult£cluster==i)])
}
## cluster 1: gol gak timnas witan indonesia
## cluster 2: timnas gak indonesia hugo samir
## cluster 3: keren indonesia hugo samir garuda
## cluster 4: hugo samir indonesia timnas gak
## cluster 5: garuda witan indonesia timnas gak