DATA MINING
Data mining adalah upaya mendapatkan informasi dari kumpulan data tertentu. Nantinya, informasi tersebut akan diolah dan digunakan sesuai dengan tujuan data mining yang ditentukan. Terutama bagi bisnis, upaya data mining penting untuk mengambil keputusan terbaik sesuai strategi bisnis yang akan dijalankan.
Panggil semua packages dengan fungsi library (nama_packages)
library(wordcloud)
## Loading required package: RColorBrewer
library(tm)
## Loading required package: NLP
library(textclean)
library(tidytext)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(parallel)
library(tokenizers)
library(tau)
library(NLP)
library(stringr)
library(devtools)
## Loading required package: usethis
library(quanteda)
## Package version: 3.3.1
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
##
## stopwords
## The following objects are masked from 'package:NLP':
##
## meta, meta<-
library(kayadata)
library(syuzhet)
library(e1071)
library(sentimentr)
##
## Attaching package: 'sentimentr'
## The following object is masked from 'package:syuzhet':
##
## get_sentences
library(SentimentAnalysis)
##
## Attaching package: 'SentimentAnalysis'
## The following object is masked from 'package:base':
##
## write
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pacman)
pacman::p_load(textstem, dplyr)
TENTANG DATASET
Dataset yang digunakan adalah data yang diperoleh dari hasil crowling data di twitter. data ini berisi 160 lebih tweets tentang wacana penghapusan skripsi oleh mentri pendidikan NADIEM MAKARIM.
Import data ke dalam R untuk dilakukan analisis
setwd("C:/Users/kjl/Documents/Tugas Kuliah/Komputasi Lanjut Semester V")
skripsi <- read.csv("~/Tugas Kuliah/Komputasi Lanjut Semester V/pak-mentri-skripsi.csv", sep=";")
tweets<-skripsi$full_text
head(skripsi)
## created_at id_str
## 1 Wed Sep 13 15:19:31 +0000 2023 1.701979e+18
## 2 Wed Sep 13 14:16:29 +0000 2023 1.701963e+18
## 3 Sat Sep 09 14:27:36 +0000 2023 1.700516e+18
## 4 Fri Sep 01 12:42:40 +0000 2023 1.697591e+18
## 5 Fri Sep 01 01:49:40 +0000 2023 1.697426e+18
## 6 Wed Aug 30 17:26:01 +0000 2023 1.696937e+18
## full_text
## 1 Bagus
## 2 Bagus. Sudah benar.
## 3 Unas uda ditiadakan, sekarang skripsi, lama2 sekolah ditiadakan sekalian aja
## 4 yup setuju jdi opsional ðŸ‘\215ðŸ‘\215
## 5 Kali ini ide mendikbud fresh banget. Tapi sayang, kalo keputusannya dari dah dulu, harusnya semester 5 th 2018 gw dah lulus wkwkwk but yg terjadi adalah semester 10 4 tahun 10 bulan
## 6 ganti jadi jurnal scopus aja pak. minimal sinta 2 deh ðŸ‘\215
## quote_count reply_count retweet_count favorite_count lang user_id_str
## 1 0 0 0 0 in 1.463881e+09
## 2 0 1 0 5 in 3.454436e+07
## 3 0 0 0 1 in 1.770778e+08
## 4 0 0 2 3 in 1.184692e+08
## 5 0 0 0 0 in 2.354459e+08
## 6 0 0 0 0 in 1.356924e+18
## conversation_id_str username
## 1 1.701979e+18 Raisuzaman
## 2 1.701963e+18 edbertgani
## 3 1.700516e+18 sanasini27
## 4 1.697591e+18 AlphaARachman
## 5 1.697426e+18 niasakinah_
## 6 1.696937e+18 haeemeung
## tweet_url
## 1 https://twitter.com/Raisuzaman/status/1701978923179868579
## 2 https://twitter.com/edbertgani/status/1701963062683570242
## 3 https://twitter.com/sanasini27/status/1700516308989989301
## 4 https://twitter.com/AlphaARachman/status/1697590799247221159
## 5 https://twitter.com/niasakinah_/status/1697426463774146868
## 6 https://twitter.com/haeemeung/status/1696937329825611821
##duplikat
#duplicate
tweets <- skripsi%>%
as.data.frame() %>%
distinct()
tweets
##jumlah baris tweet setelah duplikat dihapus
nrow(tweets)
## NULL
##hapus url
tweets <- tweets %>%
replace_html() %>%
replace_url()
tweets
tweets <- strip(tweets)
head(tweets)
##stemming/lemmatizing = kata dasar
#stemming/lemmatizing = kata dasar
stem_strings(tweets)
##cetak tweet dengan html yang dikonversi di index
replace_html(replace_emoji(tweets))
tweets <- tweets %>%
replace_emoji(.) %>%
replace_html(.)
replace_tag(tweets)
tweets <- tweets %>%
replace_tag(tweets, pattern = "@([A-Za-z0-9_]+)",replacement="") %>% # remove mentions
replace_hash(tweets, pattern = "#([A-Za-z0-9_]+)",replacement="") # remove hashtags
tweets
##strip simbol
tweets <- strip(tweets)
##menghapus kata penghubung atau kata yang tidak baku
tweets <-removeWords(tweets, c("di","dan","yang","akan","agar","seperti","yaitu","kami","kami",
"mari","pada","jelang","dimana","dengan","sudah","ini","seluruh",
"diminta","tak","itu","hai","bisa","wib","oleh","mai","jam", "aug",
"masa","berikut","kalau","klik","ibodwq","terd","httpstconvv","tue","wed",
"httpstcoxu","yzmrlyx","tahapan","refaabdi","kota","kpu","kpuid","rt","hingga",
"saat", "belum","apa","sih","suara","pesta","dindap","http","httpstco",
"asn","bakal","wkwk","wkwkw","aug","iya","uu","i","ada","ngene","yang","bjir",
"ðÿðÿ","un","anjir","tahi","tbtb","my","wios","sialan","wkwkwkwk","sip","omo",
"like","plss","ket","e","after","ha","pakðÿ", "but","rill","cashback",
"allah","and","o","ðÿ'^ðÿ","nya","ya","ðÿ","no","nuruk","ki","jir",
"anjing","biar","kagak","sayang","mah","anjay","ngaruh","kalo","gua","thesis",
"skripsiðÿ","duh","ih","ots","a","pft","plis","plan","ra","rabi","o",
"skripshit","duit","sih","nih", "amp", "ï","tuh","tau","â","â","aaaa","deh","ðÿº",
"coba","dll","iki","gue","kena","oon","pas","sad","up","wkwkwk","waleh","ajg",
"ah","adaâ","alaala","alah","alamðÿ","allahâ","ayo","end","bu","biak","is"))
head(rev)
##
## 1 function (x)
## 2 UseMethod("rev")
tweets <- tolower(tweets)
tweets
##Mengembalikan Kata yang disingkat Menjadi Kata Aslinya
tweets <- replace_contraction(tweets)
tweets
###Mengembalikan Kata yang Mengalami Perpanjangan Menjadi Kata Aslinya
tweets <- replace_word_elongation(tweets)
tweets
write.csv(rev,file = "C:/Users/kjl/Documents/data-bersih3.csv", row.names = F)
tdm <- TermDocumentMatrix(tweets)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing = TRUE)
##Mengubah Data Faktor Menjadi Data Frame
d <- data.frame(word = names(v), freq = v)
Membuast Diagram Worcloud
wordcloud(d$word, d$freq,
random.order = FALSE,
max.words = 500,
colors = brewer.pal(name="Dark2",8))
tdm <-TermDocumentMatrix (tweets,
control = list(wordLengths= c (1, inf)))
tdm
(freq.terms <- findFreqTerms(tdm, lowfreq = 14))
## [1] "aja" "skripsi" "dah" "dulu" "lulus" "pak"
## [7] "baru" "udah" "skripsian" "kenapa" "gak" "aku"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 14)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x = term, y = freq)) + geom_bar(stat = "identity") +
xlab("Terms") + ylab("Count") + coord_flip()
##Menghapus istilah-istilah yang jarang?
tdm2 <- removeSparseTerms(tdm, sparse = 0.95)
m2 <- as.matrix(tdm2)
ANALISIS CLUSTER HIERARKI
distMatrix <- dist(scale (m2))
fit <- hclust (distMatrix, method = "ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
plot(fit)
rect.hclust(fit, k = 4)
ANALISIS CLUSTER K-MEANS
m3 <- t(m2) # transpose the matrix to cluster documents (tweets)
m3
## Terms
## Docs aja skripsi dari dulu lulus tapi pak kampus baru sidang udah skripsian
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 1 1 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 1 1 1 1 0 0 0 0 0 0
## 6 1 0 0 0 0 0 1 0 0 0 0 0
## 7 0 0 0 0 0 0 0 0 0 0 0 0
## 8 0 0 0 0 0 0 0 0 0 0 0 0
## 9 1 1 0 0 0 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0 1 0 0 0 0
## 11 0 0 0 0 0 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0 1 0 0 0 0
## 13 0 0 0 0 0 0 0 0 1 1 1 0
## 14 0 1 0 0 2 1 0 0 0 0 0 0
## 15 0 0 0 0 0 0 0 0 0 0 1 1
## 16 0 0 0 0 0 0 0 0 1 0 0 0
## 17 0 1 1 0 1 0 0 0 0 0 0 0
## 18 0 0 0 0 0 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0 0 0 0 0 0
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 22 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 1 0 0 0 1 0 2 0 0 0 0
## 24 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 1 0 0 0 0 0 0 0 0 0 0
## 26 0 0 0 0 0 0 0 0 0 0 0 0
## 27 0 0 0 0 0 0 1 0 0 0 0 0
## 28 0 1 0 0 0 0 0 0 0 0 0 0
## 29 0 0 0 0 0 0 0 0 1 0 0 0
## 30 0 0 0 0 0 0 0 0 0 0 0 1
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## 32 0 0 0 0 0 0 0 0 0 1 0 0
## 33 0 0 0 0 0 0 1 0 0 0 0 0
## 34 0 0 0 0 0 0 0 0 0 0 0 0
## 35 0 1 0 1 0 1 0 0 0 0 0 0
## 36 0 0 0 0 0 0 0 0 0 0 0 0
## 37 0 0 1 0 0 0 0 0 1 0 0 0
## 38 0 1 0 0 0 1 0 0 0 0 0 1
## 39 0 0 0 0 0 0 1 0 0 0 0 0
## 40 0 0 0 0 0 0 0 0 0 0 0 0
## 41 0 0 0 1 0 0 0 0 0 0 0 0
## 42 0 0 0 0 0 0 0 0 0 0 0 0
## 43 1 0 1 0 0 0 2 0 0 0 0 0
## 44 0 1 0 0 1 0 0 0 0 0 0 0
## 45 0 1 0 1 0 0 0 0 0 0 0 0
## 46 0 0 0 0 0 0 0 0 0 0 0 0
## 47 0 0 0 0 0 0 0 0 0 0 0 0
## 48 0 0 0 0 0 0 0 0 0 0 0 0
## 49 0 0 0 0 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 1 0 0 0
## 51 0 0 0 0 0 0 0 0 0 0 0 0
## 52 0 0 0 0 0 0 0 0 0 0 0 0
## 53 0 0 0 0 0 0 0 0 0 0 0 0
## 54 0 0 0 0 0 0 0 0 0 0 0 0
## 55 0 0 0 0 0 0 0 0 0 0 0 0
## 56 1 0 0 0 0 0 0 0 1 0 0 0
## 57 0 0 0 0 0 0 0 0 0 0 0 0
## 58 0 0 0 0 0 0 0 0 0 0 0 0
## 59 0 0 0 0 1 0 0 0 1 0 1 0
## 60 0 0 1 0 0 1 0 0 0 0 0 0
## 61 0 0 0 0 0 0 0 0 0 0 0 0
## 62 0 0 0 0 0 0 0 0 0 0 0 0
## 63 0 0 0 0 0 0 0 0 0 0 0 0
## 64 0 0 0 0 0 0 0 0 0 0 0 0
## 65 0 0 0 0 0 0 0 0 0 0 0 0
## 66 0 0 0 1 0 0 0 1 0 0 0 0
## 67 0 1 0 0 0 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0 0 0 0 0 0
## 69 0 1 0 0 0 0 0 0 0 1 0 0
## 70 0 0 0 0 0 0 0 0 0 0 0 0
## 71 0 0 0 0 0 0 0 0 0 0 0 0
## 72 0 0 0 0 0 0 0 0 1 0 0 0
## 73 0 0 0 0 0 0 0 0 0 0 0 0
## 74 0 0 0 1 0 0 0 0 0 0 1 0
## 75 0 0 0 0 0 0 0 0 0 0 0 0
## 76 0 0 0 0 1 0 2 0 1 0 1 0
## 77 0 0 0 0 0 0 0 0 0 0 1 1
## 78 0 0 0 0 0 0 0 0 0 0 0 0
## 79 0 0 0 0 0 0 0 0 1 1 0 0
## 80 0 0 0 0 0 0 0 0 0 0 1 0
## 81 0 1 0 1 0 0 0 0 0 0 0 0
## 82 0 0 0 0 0 0 0 0 0 0 0 0
## 83 0 0 0 0 0 0 1 0 0 0 0 0
## 84 0 0 0 0 1 0 0 0 1 0 1 0
## 85 0 0 0 0 1 0 0 0 0 1 1 0
## 86 0 0 0 0 0 0 0 0 0 0 0 0
## 87 0 0 0 0 0 0 0 0 0 0 0 0
## 88 0 0 0 0 0 0 0 0 0 0 0 0
## 89 0 0 0 0 0 0 0 0 0 0 0 0
## 90 0 0 0 0 1 0 0 0 0 0 1 0
## 91 0 0 1 0 0 0 0 0 0 0 0 0
## 92 1 1 0 0 0 1 0 0 0 0 0 0
## 93 0 0 0 0 1 0 0 0 0 0 0 0
## 94 0 0 0 0 0 0 0 0 0 0 0 0
## 95 0 0 1 0 0 0 0 0 0 0 0 0
## 96 0 0 0 0 0 0 0 0 0 0 0 0
## 97 0 0 0 0 0 0 0 0 0 0 0 0
## 98 0 0 0 0 0 0 0 0 0 0 0 0
## 99 0 1 0 0 0 0 1 0 1 1 0 0
## 100 0 1 0 0 0 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0 0 0 0 0 0
## 102 0 0 0 0 0 0 0 0 0 0 0 0
## 103 0 0 0 0 0 0 0 0 0 0 1 0
## 104 0 0 0 0 0 0 0 0 1 0 0 0
## 105 0 0 0 0 0 0 0 0 0 0 0 0
## 106 0 0 0 1 0 0 0 0 1 0 0 0
## 107 0 0 0 0 0 0 0 0 0 0 0 0
## 108 0 0 0 0 0 0 0 0 0 0 0 0
## 109 1 0 0 0 0 0 0 0 1 0 0 0
## 110 0 0 0 0 0 0 1 0 0 0 0 0
## 111 0 0 0 0 0 0 0 0 0 0 0 0
## 112 0 0 0 0 0 0 0 0 0 0 0 0
## 113 0 0 0 0 0 0 0 0 0 1 0 0
## 114 0 0 0 0 0 0 0 0 0 0 0 0
## 115 1 0 0 1 1 0 0 1 0 0 0 0
## 116 0 0 0 0 0 0 0 0 0 0 0 0
## 117 0 1 0 0 1 0 0 0 0 0 0 0
## 118 0 0 0 0 0 0 0 0 0 0 0 0
## 119 0 0 0 0 0 0 0 0 0 0 0 0
## 120 2 2 0 0 0 1 1 0 0 0 0 0
## 121 0 0 0 0 1 0 0 0 0 0 0 0
## 122 0 0 0 0 0 0 0 0 0 0 0 0
## 123 0 0 0 0 0 0 0 1 1 0 0 0
## 124 0 0 0 0 0 0 0 0 0 0 0 0
## 125 0 0 0 0 0 0 0 0 0 0 0 0
## 126 0 0 0 0 0 0 0 0 0 0 0 1
## 127 0 0 0 0 0 0 0 0 0 0 0 0
## 128 0 0 0 0 0 0 0 0 0 0 0 0
## 129 0 0 0 0 0 0 0 0 0 0 0 0
## 130 0 1 0 0 0 0 0 0 1 0 0 0
## 131 0 0 0 0 0 0 0 0 0 0 0 1
## 132 0 0 0 0 0 0 0 0 0 0 0 0
## 133 0 0 0 0 0 0 0 0 0 0 0 0
## 134 0 0 0 0 0 0 0 0 0 0 0 0
## 135 0 0 0 0 0 0 0 0 0 0 0 0
## 136 0 0 0 0 1 0 1 0 0 0 1 1
## 137 0 0 0 0 0 0 1 0 0 0 0 0
## 138 0 1 0 0 0 0 0 0 0 0 0 0
## 139 0 0 0 0 1 0 0 0 0 0 0 0
## 140 0 0 0 0 0 0 0 0 0 0 0 1
## 141 0 0 0 0 0 0 0 0 0 0 0 0
## 142 0 0 0 0 0 0 0 0 0 0 0 0
## 143 0 1 0 0 0 0 0 0 1 1 0 0
## 144 0 0 0 0 0 0 0 0 0 0 0 0
## 145 0 0 0 0 1 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0 0 0 0 0 0
## 147 1 0 0 0 0 0 1 1 0 0 0 0
## 148 0 0 0 0 0 0 0 0 0 0 1 0
## 149 0 1 0 1 0 0 0 0 0 0 0 0
## 150 0 1 0 0 0 0 0 0 1 1 0 0
## 151 0 0 0 0 0 1 0 0 0 0 0 0
## 152 0 0 0 0 0 0 0 0 0 0 0 0
## 153 0 0 0 0 0 0 0 0 0 0 0 0
## 154 0 0 0 0 0 0 1 0 0 0 1 0
## 155 0 0 0 0 0 0 0 0 0 0 0 0
## 156 0 0 0 0 0 0 0 0 0 0 0 0
## 157 0 0 0 0 0 0 0 0 0 0 0 0
## 158 0 0 0 0 0 0 0 0 0 0 0 0
## 159 0 0 0 0 0 0 0 0 0 0 0 0
## 160 0 0 0 0 0 1 0 0 0 0 0 0
## 161 0 0 0 0 0 0 1 0 0 0 0 0
## 162 0 0 0 0 0 0 0 0 0 0 0 0
## 163 0 1 0 0 0 1 0 1 0 0 0 0
## 164 0 0 1 1 0 0 0 0 0 0 0 0
## 165 0 0 0 0 0 0 0 0 0 0 0 0
## 166 0 0 0 0 0 0 0 0 0 0 0 0
## 167 0 0 0 0 0 0 0 0 0 0 0 0
## 168 0 0 0 0 0 0 0 0 0 0 0 1
## 169 9 12 5 4 10 1 9 2 9 3 5 5
## Terms
## Docs kenapa telat gak aku
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0
## 12 0 0 0 0
## 13 0 0 0 0
## 14 0 0 0 0
## 15 0 0 0 0
## 16 1 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 0 0 0 0
## 26 0 1 0 0
## 27 0 0 0 0
## 28 0 0 1 0
## 29 1 0 0 0
## 30 0 0 0 0
## 31 0 0 0 0
## 32 0 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 2
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 1
## 39 0 0 1 1
## 40 0 0 0 0
## 41 0 0 0 0
## 42 0 1 0 0
## 43 1 1 1 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 1 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
## 50 0 0 0 0
## 51 0 0 0 0
## 52 0 0 0 0
## 53 0 1 0 0
## 54 0 0 0 0
## 55 0 0 0 0
## 56 0 0 0 1
## 57 0 0 0 0
## 58 0 0 0 0
## 59 0 0 0 0
## 60 0 0 0 0
## 61 0 0 0 1
## 62 0 0 0 0
## 63 0 0 0 0
## 64 0 0 0 0
## 65 0 1 0 0
## 66 0 0 1 0
## 67 0 0 0 0
## 68 0 0 0 0
## 69 0 0 0 0
## 70 0 0 0 0
## 71 0 0 0 0
## 72 0 0 0 0
## 73 0 0 0 0
## 74 0 0 0 0
## 75 0 0 0 0
## 76 0 0 0 0
## 77 0 0 0 0
## 78 0 0 0 0
## 79 0 0 0 0
## 80 0 0 0 0
## 81 1 0 0 0
## 82 0 0 0 0
## 83 0 0 0 0
## 84 0 0 0 0
## 85 0 0 0 0
## 86 0 0 0 0
## 87 0 0 0 0
## 88 0 1 0 0
## 89 0 0 0 0
## 90 0 0 0 0
## 91 0 0 0 0
## 92 0 0 0 0
## 93 0 0 0 0
## 94 0 0 0 0
## 95 0 0 0 0
## 96 0 0 0 0
## 97 0 0 0 0
## 98 0 1 0 0
## 99 0 0 0 0
## 100 0 0 3 0
## 101 0 0 0 0
## 102 0 0 0 0
## 103 0 0 0 0
## 104 0 0 0 0
## 105 0 0 0 0
## 106 1 0 0 0
## 107 0 0 0 0
## 108 0 0 0 0
## 109 0 0 0 0
## 110 0 0 0 1
## 111 0 0 0 0
## 112 0 0 0 1
## 113 0 0 0 0
## 114 0 0 0 0
## 115 0 0 0 0
## 116 0 0 0 0
## 117 0 0 0 0
## 118 0 0 0 0
## 119 0 0 0 0
## 120 0 0 1 0
## 121 0 0 0 0
## 122 0 0 0 0
## 123 0 0 0 0
## 124 0 0 0 0
## 125 0 0 0 0
## 126 1 0 0 0
## 127 0 0 0 0
## 128 0 0 0 1
## 129 0 0 0 0
## 130 0 0 0 0
## 131 0 0 1 0
## 132 0 0 0 0
## 133 0 0 0 0
## 134 0 0 0 0
## 135 0 0 0 0
## 136 0 1 1 0
## 137 0 0 0 0
## 138 0 0 0 0
## 139 0 0 0 0
## 140 0 0 1 0
## 141 0 0 0 0
## 142 0 0 0 1
## 143 0 0 0 0
## 144 0 0 0 0
## 145 1 0 0 0
## 146 0 0 0 0
## 147 0 0 1 0
## 148 0 0 0 0
## 149 0 0 0 0
## 150 1 0 0 0
## 151 0 0 0 0
## 152 0 0 0 0
## 153 0 0 0 0
## 154 0 1 0 0
## 155 0 0 0 0
## 156 0 0 0 0
## 157 0 0 0 0
## 158 0 0 0 0
## 159 0 0 0 0
## 160 0 0 0 0
## 161 0 0 0 0
## 162 0 0 0 0
## 163 0 0 0 0
## 164 1 0 0 0
## 165 0 0 0 0
## 166 0 0 0 0
## 167 0 0 0 0
## 168 0 0 0 0
## 169 8 1 2 7
set.seed(122)
k<- 3
kmeansResult<-kmeans(m3, k)
round(kmeansResult$centers, digits=3)
## aja skripsi dari dulu lulus tapi pak kampus baru sidang udah
## 1 0.038 0.139 0.044 0.07 0.095 0.063 0.038 0.051 0.108 0.051 0.07
## 2 9.000 12.000 5.000 4.00 10.000 1.000 9.000 2.000 9.000 3.000 5.00
## 3 0.500 0.400 0.100 0.00 0.200 0.100 1.100 0.100 0.200 0.100 0.30
## skripsian kenapa telat gak aku
## 1 0.051 0.051 0.038 0.032 0.057
## 2 5.000 8.000 1.000 2.000 7.000
## 3 0.100 0.100 0.300 0.800 0.100
for (i in 1:k) {
cat(paste("cluster ", i, ": ", sep = ""))
s <- sort(kmeansResult$centers[i, ], decreasing = T)
cat(names(s)[1:5], "\n")
# print the tweets of every cluster
# print(tweets[which(kmeansResult£cluster==i)])
}
## cluster 1: skripsi baru lulus dulu udah
## cluster 2: skripsi lulus aja pak baru
## cluster 3: pak gak aja skripsi udah