DATA MINING

Data mining adalah upaya mendapatkan informasi dari kumpulan data tertentu. Nantinya, informasi tersebut akan diolah dan digunakan sesuai dengan tujuan data mining yang ditentukan. Terutama bagi bisnis, upaya data mining penting untuk mengambil keputusan terbaik sesuai strategi bisnis yang akan dijalankan.

Panggil semua packages dengan fungsi library (nama_packages)

library(wordcloud)
## Loading required package: RColorBrewer
library(tm)
## Loading required package: NLP
library(textclean)
library(tidytext)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
library(parallel)
library(tokenizers)
library(tau)
library(NLP)
library(stringr)
library(devtools)
## Loading required package: usethis
library(quanteda)
## Package version: 3.3.1
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
## 
##     stopwords
## The following objects are masked from 'package:NLP':
## 
##     meta, meta<-
library(kayadata)
library(syuzhet)
library(e1071)
library(sentimentr)
## 
## Attaching package: 'sentimentr'
## The following object is masked from 'package:syuzhet':
## 
##     get_sentences
library(SentimentAnalysis)
## 
## Attaching package: 'SentimentAnalysis'
## The following object is masked from 'package:base':
## 
##     write
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(pacman)
pacman::p_load(textstem, dplyr)

TENTANG DATASET

Dataset yang digunakan adalah data yang diperoleh dari hasil crowling data di twitter. data ini berisi 160 lebih tweets tentang wacana penghapusan skripsi oleh mentri pendidikan NADIEM MAKARIM.

Import data ke dalam R untuk dilakukan analisis

setwd("C:/Users/kjl/Documents/Tugas Kuliah/Komputasi Lanjut Semester V")
skripsi <- read.csv("~/Tugas Kuliah/Komputasi Lanjut Semester V/pak-mentri-skripsi.csv", sep=";")
tweets<-skripsi$full_text
head(skripsi)
##                       created_at       id_str
## 1 Wed Sep 13 15:19:31 +0000 2023 1.701979e+18
## 2 Wed Sep 13 14:16:29 +0000 2023 1.701963e+18
## 3 Sat Sep 09 14:27:36 +0000 2023 1.700516e+18
## 4 Fri Sep 01 12:42:40 +0000 2023 1.697591e+18
## 5 Fri Sep 01 01:49:40 +0000 2023 1.697426e+18
## 6 Wed Aug 30 17:26:01 +0000 2023 1.696937e+18
##                                                                                                                                                                               full_text
## 1                                                                                                                                                                                 Bagus
## 2                                                                                                                                                                   Bagus. Sudah benar.
## 3                                                                                                          Unas uda ditiadakan, sekarang skripsi, lama2 sekolah ditiadakan sekalian aja
## 4                                                                                                                                                      yup setuju jdi opsional ðŸ‘\215ðŸ‘\215
## 5 Kali ini ide mendikbud fresh banget. Tapi sayang, kalo keputusannya dari dah dulu, harusnya semester 5 th 2018 gw dah lulus wkwkwk but yg terjadi adalah semester 10 4 tahun 10 bulan
## 6                                                                                                                            ganti jadi jurnal scopus aja pak. minimal sinta 2 deh ðŸ‘\215
##   quote_count reply_count retweet_count favorite_count lang  user_id_str
## 1           0           0             0              0   in 1.463881e+09
## 2           0           1             0              5   in 3.454436e+07
## 3           0           0             0              1   in 1.770778e+08
## 4           0           0             2              3   in 1.184692e+08
## 5           0           0             0              0   in 2.354459e+08
## 6           0           0             0              0   in 1.356924e+18
##   conversation_id_str      username
## 1        1.701979e+18    Raisuzaman
## 2        1.701963e+18    edbertgani
## 3        1.700516e+18    sanasini27
## 4        1.697591e+18 AlphaARachman
## 5        1.697426e+18   niasakinah_
## 6        1.696937e+18     haeemeung
##                                                      tweet_url
## 1    https://twitter.com/Raisuzaman/status/1701978923179868579
## 2    https://twitter.com/edbertgani/status/1701963062683570242
## 3    https://twitter.com/sanasini27/status/1700516308989989301
## 4 https://twitter.com/AlphaARachman/status/1697590799247221159
## 5   https://twitter.com/niasakinah_/status/1697426463774146868
## 6     https://twitter.com/haeemeung/status/1696937329825611821

##duplikat

#duplicate
tweets <- skripsi%>% 
  as.data.frame() %>% 
  distinct()
tweets

##jumlah baris tweet setelah duplikat dihapus

nrow(tweets)
## NULL

##hapus url

tweets <- tweets %>% 
  replace_html() %>%   
  replace_url()
tweets

tweets <- strip(tweets)
head(tweets)

##stemming/lemmatizing = kata dasar

#stemming/lemmatizing = kata dasar
stem_strings(tweets)

##cetak tweet dengan html yang dikonversi di index

replace_html(replace_emoji(tweets))

melakukan tugas penggantian seluruh variabel teks

tweets <- tweets %>% 
  replace_emoji(.) %>% 
  replace_html(.)

hapus mentions

tweets <- tweets %>% 
  replace_tag(tweets, pattern = "@([A-Za-z0-9_]+)",replacement="") %>%  # remove mentions
  replace_hash(tweets, pattern = "#([A-Za-z0-9_]+)",replacement="")      # remove hashtags
tweets

##strip simbol

tweets <- strip(tweets)

##menghapus kata penghubung atau kata yang tidak baku

tweets <-removeWords(tweets, c("di","dan","yang","akan","agar","seperti","yaitu","kami","kami",
                         "mari","pada","jelang","dimana","dengan","sudah","ini","seluruh",
                         "diminta","tak","itu","hai","bisa","wib","oleh","mai","jam", "aug",
                         "masa","berikut","kalau","klik","ibodwq","terd","httpstconvv","tue","wed",
                         "httpstcoxu","yzmrlyx","tahapan","refaabdi","kota","kpu","kpuid","rt","hingga",
                         "saat", "belum","apa","sih","suara","pesta","dindap","http","httpstco",
                         "asn","bakal","wkwk","wkwkw","aug","iya","uu","i","ada","ngene","yang","bjir",
                         "ðÿðÿ","un","anjir","tahi","tbtb","my","wios","sialan","wkwkwkwk","sip","omo",
                         "like","plss","ket","e","after","ha","pakðÿ", "but","rill","cashback",
                         "allah","and","o","ðÿ'^ðÿ","nya","ya","ðÿ","no","nuruk","ki","jir",
                         "anjing","biar","kagak","sayang","mah","anjay","ngaruh","kalo","gua","thesis",
                         "skripsiðÿ","duh","ih","ots","a","pft","plis","plan","ra","rabi","o", 
                         "skripshit","duit","sih","nih", "amp", "ï","tuh","tau","â","â","aaaa","deh","ðÿº",
                         "coba","dll","iki","gue","kena","oon","pas","sad","up","wkwkwk","waleh","ajg",
                         "ah","adaâ","alaala","alah","alamðÿ","allahâ","ayo","end","bu","biak","is"))
head(rev)
##                   
## 1 function (x)    
## 2 UseMethod("rev")

lower case = mengubah huruf kapital menjadi huruf kecil

tweets <- tolower(tweets)
tweets

##Mengembalikan Kata yang disingkat Menjadi Kata Aslinya

tweets <- replace_contraction(tweets)
tweets

###Mengembalikan Kata yang Mengalami Perpanjangan Menjadi Kata Aslinya

tweets <- replace_word_elongation(tweets)
tweets

Menyimpang data yang sudah dibersihkan

write.csv(rev,file = "C:/Users/kjl/Documents/data-bersih3.csv", row.names = F) 

Mengubah Data Frame Menjadi Data Faktor

tdm <- TermDocumentMatrix(tweets)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing = TRUE)

##Mengubah Data Faktor Menjadi Data Frame

d <- data.frame(word = names(v), freq = v)

Membuast Diagram Worcloud

wordcloud(d$word, d$freq,
          random.order = FALSE,
          max.words = 500,
          colors = brewer.pal(name="Dark2",8))

tdm <-TermDocumentMatrix (tweets,
                        control = list(wordLengths= c (1, inf)))
tdm

periksa kata-kata yang sering muncul

(freq.terms <- findFreqTerms(tdm, lowfreq = 14))
##  [1] "aja"       "skripsi"   "dah"       "dulu"      "lulus"     "pak"      
##  [7] "baru"      "udah"      "skripsian" "kenapa"    "gak"       "aku"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 14)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x = term, y = freq)) + geom_bar(stat = "identity") +
  xlab("Terms") + ylab("Count") + coord_flip()

##Menghapus istilah-istilah yang jarang?

tdm2 <- removeSparseTerms(tdm, sparse = 0.95)
m2 <- as.matrix(tdm2)

ANALISIS CLUSTER HIERARKI

distMatrix <- dist(scale (m2))
fit <- hclust (distMatrix, method = "ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
plot(fit) 
rect.hclust(fit, k = 4)

ANALISIS CLUSTER K-MEANS

m3 <- t(m2) # transpose the matrix to cluster documents (tweets)
m3
##      Terms
## Docs  aja skripsi dari dulu lulus tapi pak kampus baru sidang udah skripsian
##   1     0       0    0    0     0    0   0      0    0      0    0         0
##   2     0       0    0    0     0    0   0      0    0      0    0         0
##   3     1       1    0    0     0    0   0      0    0      0    0         0
##   4     0       0    0    0     0    0   0      0    0      0    0         0
##   5     0       0    1    1     1    1   0      0    0      0    0         0
##   6     1       0    0    0     0    0   1      0    0      0    0         0
##   7     0       0    0    0     0    0   0      0    0      0    0         0
##   8     0       0    0    0     0    0   0      0    0      0    0         0
##   9     1       1    0    0     0    0   0      0    0      0    0         0
##   10    0       0    0    0     0    0   0      1    0      0    0         0
##   11    0       0    0    0     0    0   0      0    0      0    0         0
##   12    0       0    0    0     0    0   0      1    0      0    0         0
##   13    0       0    0    0     0    0   0      0    1      1    1         0
##   14    0       1    0    0     2    1   0      0    0      0    0         0
##   15    0       0    0    0     0    0   0      0    0      0    1         1
##   16    0       0    0    0     0    0   0      0    1      0    0         0
##   17    0       1    1    0     1    0   0      0    0      0    0         0
##   18    0       0    0    0     0    0   0      0    0      0    0         0
##   19    0       0    0    0     0    0   0      0    0      0    0         0
##   20    0       0    0    0     0    0   0      0    0      0    0         0
##   21    0       0    0    0     0    0   0      0    0      0    0         0
##   22    0       0    0    0     0    0   0      0    0      0    0         0
##   23    0       1    0    0     0    1   0      2    0      0    0         0
##   24    0       0    0    0     0    0   0      0    0      0    0         0
##   25    0       1    0    0     0    0   0      0    0      0    0         0
##   26    0       0    0    0     0    0   0      0    0      0    0         0
##   27    0       0    0    0     0    0   1      0    0      0    0         0
##   28    0       1    0    0     0    0   0      0    0      0    0         0
##   29    0       0    0    0     0    0   0      0    1      0    0         0
##   30    0       0    0    0     0    0   0      0    0      0    0         1
##   31    0       0    0    0     0    0   0      0    0      0    0         0
##   32    0       0    0    0     0    0   0      0    0      1    0         0
##   33    0       0    0    0     0    0   1      0    0      0    0         0
##   34    0       0    0    0     0    0   0      0    0      0    0         0
##   35    0       1    0    1     0    1   0      0    0      0    0         0
##   36    0       0    0    0     0    0   0      0    0      0    0         0
##   37    0       0    1    0     0    0   0      0    1      0    0         0
##   38    0       1    0    0     0    1   0      0    0      0    0         1
##   39    0       0    0    0     0    0   1      0    0      0    0         0
##   40    0       0    0    0     0    0   0      0    0      0    0         0
##   41    0       0    0    1     0    0   0      0    0      0    0         0
##   42    0       0    0    0     0    0   0      0    0      0    0         0
##   43    1       0    1    0     0    0   2      0    0      0    0         0
##   44    0       1    0    0     1    0   0      0    0      0    0         0
##   45    0       1    0    1     0    0   0      0    0      0    0         0
##   46    0       0    0    0     0    0   0      0    0      0    0         0
##   47    0       0    0    0     0    0   0      0    0      0    0         0
##   48    0       0    0    0     0    0   0      0    0      0    0         0
##   49    0       0    0    0     0    0   0      0    0      0    0         0
##   50    0       0    0    0     0    0   0      0    1      0    0         0
##   51    0       0    0    0     0    0   0      0    0      0    0         0
##   52    0       0    0    0     0    0   0      0    0      0    0         0
##   53    0       0    0    0     0    0   0      0    0      0    0         0
##   54    0       0    0    0     0    0   0      0    0      0    0         0
##   55    0       0    0    0     0    0   0      0    0      0    0         0
##   56    1       0    0    0     0    0   0      0    1      0    0         0
##   57    0       0    0    0     0    0   0      0    0      0    0         0
##   58    0       0    0    0     0    0   0      0    0      0    0         0
##   59    0       0    0    0     1    0   0      0    1      0    1         0
##   60    0       0    1    0     0    1   0      0    0      0    0         0
##   61    0       0    0    0     0    0   0      0    0      0    0         0
##   62    0       0    0    0     0    0   0      0    0      0    0         0
##   63    0       0    0    0     0    0   0      0    0      0    0         0
##   64    0       0    0    0     0    0   0      0    0      0    0         0
##   65    0       0    0    0     0    0   0      0    0      0    0         0
##   66    0       0    0    1     0    0   0      1    0      0    0         0
##   67    0       1    0    0     0    0   0      0    0      0    0         0
##   68    0       0    0    0     0    0   0      0    0      0    0         0
##   69    0       1    0    0     0    0   0      0    0      1    0         0
##   70    0       0    0    0     0    0   0      0    0      0    0         0
##   71    0       0    0    0     0    0   0      0    0      0    0         0
##   72    0       0    0    0     0    0   0      0    1      0    0         0
##   73    0       0    0    0     0    0   0      0    0      0    0         0
##   74    0       0    0    1     0    0   0      0    0      0    1         0
##   75    0       0    0    0     0    0   0      0    0      0    0         0
##   76    0       0    0    0     1    0   2      0    1      0    1         0
##   77    0       0    0    0     0    0   0      0    0      0    1         1
##   78    0       0    0    0     0    0   0      0    0      0    0         0
##   79    0       0    0    0     0    0   0      0    1      1    0         0
##   80    0       0    0    0     0    0   0      0    0      0    1         0
##   81    0       1    0    1     0    0   0      0    0      0    0         0
##   82    0       0    0    0     0    0   0      0    0      0    0         0
##   83    0       0    0    0     0    0   1      0    0      0    0         0
##   84    0       0    0    0     1    0   0      0    1      0    1         0
##   85    0       0    0    0     1    0   0      0    0      1    1         0
##   86    0       0    0    0     0    0   0      0    0      0    0         0
##   87    0       0    0    0     0    0   0      0    0      0    0         0
##   88    0       0    0    0     0    0   0      0    0      0    0         0
##   89    0       0    0    0     0    0   0      0    0      0    0         0
##   90    0       0    0    0     1    0   0      0    0      0    1         0
##   91    0       0    1    0     0    0   0      0    0      0    0         0
##   92    1       1    0    0     0    1   0      0    0      0    0         0
##   93    0       0    0    0     1    0   0      0    0      0    0         0
##   94    0       0    0    0     0    0   0      0    0      0    0         0
##   95    0       0    1    0     0    0   0      0    0      0    0         0
##   96    0       0    0    0     0    0   0      0    0      0    0         0
##   97    0       0    0    0     0    0   0      0    0      0    0         0
##   98    0       0    0    0     0    0   0      0    0      0    0         0
##   99    0       1    0    0     0    0   1      0    1      1    0         0
##   100   0       1    0    0     0    0   0      0    0      0    0         0
##   101   0       0    0    0     0    0   0      0    0      0    0         0
##   102   0       0    0    0     0    0   0      0    0      0    0         0
##   103   0       0    0    0     0    0   0      0    0      0    1         0
##   104   0       0    0    0     0    0   0      0    1      0    0         0
##   105   0       0    0    0     0    0   0      0    0      0    0         0
##   106   0       0    0    1     0    0   0      0    1      0    0         0
##   107   0       0    0    0     0    0   0      0    0      0    0         0
##   108   0       0    0    0     0    0   0      0    0      0    0         0
##   109   1       0    0    0     0    0   0      0    1      0    0         0
##   110   0       0    0    0     0    0   1      0    0      0    0         0
##   111   0       0    0    0     0    0   0      0    0      0    0         0
##   112   0       0    0    0     0    0   0      0    0      0    0         0
##   113   0       0    0    0     0    0   0      0    0      1    0         0
##   114   0       0    0    0     0    0   0      0    0      0    0         0
##   115   1       0    0    1     1    0   0      1    0      0    0         0
##   116   0       0    0    0     0    0   0      0    0      0    0         0
##   117   0       1    0    0     1    0   0      0    0      0    0         0
##   118   0       0    0    0     0    0   0      0    0      0    0         0
##   119   0       0    0    0     0    0   0      0    0      0    0         0
##   120   2       2    0    0     0    1   1      0    0      0    0         0
##   121   0       0    0    0     1    0   0      0    0      0    0         0
##   122   0       0    0    0     0    0   0      0    0      0    0         0
##   123   0       0    0    0     0    0   0      1    1      0    0         0
##   124   0       0    0    0     0    0   0      0    0      0    0         0
##   125   0       0    0    0     0    0   0      0    0      0    0         0
##   126   0       0    0    0     0    0   0      0    0      0    0         1
##   127   0       0    0    0     0    0   0      0    0      0    0         0
##   128   0       0    0    0     0    0   0      0    0      0    0         0
##   129   0       0    0    0     0    0   0      0    0      0    0         0
##   130   0       1    0    0     0    0   0      0    1      0    0         0
##   131   0       0    0    0     0    0   0      0    0      0    0         1
##   132   0       0    0    0     0    0   0      0    0      0    0         0
##   133   0       0    0    0     0    0   0      0    0      0    0         0
##   134   0       0    0    0     0    0   0      0    0      0    0         0
##   135   0       0    0    0     0    0   0      0    0      0    0         0
##   136   0       0    0    0     1    0   1      0    0      0    1         1
##   137   0       0    0    0     0    0   1      0    0      0    0         0
##   138   0       1    0    0     0    0   0      0    0      0    0         0
##   139   0       0    0    0     1    0   0      0    0      0    0         0
##   140   0       0    0    0     0    0   0      0    0      0    0         1
##   141   0       0    0    0     0    0   0      0    0      0    0         0
##   142   0       0    0    0     0    0   0      0    0      0    0         0
##   143   0       1    0    0     0    0   0      0    1      1    0         0
##   144   0       0    0    0     0    0   0      0    0      0    0         0
##   145   0       0    0    0     1    0   0      0    0      0    0         0
##   146   0       0    0    0     0    0   0      0    0      0    0         0
##   147   1       0    0    0     0    0   1      1    0      0    0         0
##   148   0       0    0    0     0    0   0      0    0      0    1         0
##   149   0       1    0    1     0    0   0      0    0      0    0         0
##   150   0       1    0    0     0    0   0      0    1      1    0         0
##   151   0       0    0    0     0    1   0      0    0      0    0         0
##   152   0       0    0    0     0    0   0      0    0      0    0         0
##   153   0       0    0    0     0    0   0      0    0      0    0         0
##   154   0       0    0    0     0    0   1      0    0      0    1         0
##   155   0       0    0    0     0    0   0      0    0      0    0         0
##   156   0       0    0    0     0    0   0      0    0      0    0         0
##   157   0       0    0    0     0    0   0      0    0      0    0         0
##   158   0       0    0    0     0    0   0      0    0      0    0         0
##   159   0       0    0    0     0    0   0      0    0      0    0         0
##   160   0       0    0    0     0    1   0      0    0      0    0         0
##   161   0       0    0    0     0    0   1      0    0      0    0         0
##   162   0       0    0    0     0    0   0      0    0      0    0         0
##   163   0       1    0    0     0    1   0      1    0      0    0         0
##   164   0       0    1    1     0    0   0      0    0      0    0         0
##   165   0       0    0    0     0    0   0      0    0      0    0         0
##   166   0       0    0    0     0    0   0      0    0      0    0         0
##   167   0       0    0    0     0    0   0      0    0      0    0         0
##   168   0       0    0    0     0    0   0      0    0      0    0         1
##   169   9      12    5    4    10    1   9      2    9      3    5         5
##      Terms
## Docs  kenapa telat gak aku
##   1        0     0   0   0
##   2        0     0   0   0
##   3        0     0   0   0
##   4        0     0   0   0
##   5        0     0   0   0
##   6        0     0   0   0
##   7        0     0   0   0
##   8        0     0   0   0
##   9        0     0   0   0
##   10       0     0   0   0
##   11       0     0   0   0
##   12       0     0   0   0
##   13       0     0   0   0
##   14       0     0   0   0
##   15       0     0   0   0
##   16       1     0   0   0
##   17       0     0   0   0
##   18       0     0   0   0
##   19       0     0   0   0
##   20       0     0   0   0
##   21       0     0   0   0
##   22       0     0   0   0
##   23       0     0   0   0
##   24       0     0   0   0
##   25       0     0   0   0
##   26       0     1   0   0
##   27       0     0   0   0
##   28       0     0   1   0
##   29       1     0   0   0
##   30       0     0   0   0
##   31       0     0   0   0
##   32       0     0   0   0
##   33       0     0   0   0
##   34       0     0   0   0
##   35       0     0   0   2
##   36       0     0   0   0
##   37       0     0   0   0
##   38       0     0   0   1
##   39       0     0   1   1
##   40       0     0   0   0
##   41       0     0   0   0
##   42       0     1   0   0
##   43       1     1   1   0
##   44       0     0   0   0
##   45       0     0   0   0
##   46       0     0   1   0
##   47       0     0   0   0
##   48       0     0   0   0
##   49       0     0   0   0
##   50       0     0   0   0
##   51       0     0   0   0
##   52       0     0   0   0
##   53       0     1   0   0
##   54       0     0   0   0
##   55       0     0   0   0
##   56       0     0   0   1
##   57       0     0   0   0
##   58       0     0   0   0
##   59       0     0   0   0
##   60       0     0   0   0
##   61       0     0   0   1
##   62       0     0   0   0
##   63       0     0   0   0
##   64       0     0   0   0
##   65       0     1   0   0
##   66       0     0   1   0
##   67       0     0   0   0
##   68       0     0   0   0
##   69       0     0   0   0
##   70       0     0   0   0
##   71       0     0   0   0
##   72       0     0   0   0
##   73       0     0   0   0
##   74       0     0   0   0
##   75       0     0   0   0
##   76       0     0   0   0
##   77       0     0   0   0
##   78       0     0   0   0
##   79       0     0   0   0
##   80       0     0   0   0
##   81       1     0   0   0
##   82       0     0   0   0
##   83       0     0   0   0
##   84       0     0   0   0
##   85       0     0   0   0
##   86       0     0   0   0
##   87       0     0   0   0
##   88       0     1   0   0
##   89       0     0   0   0
##   90       0     0   0   0
##   91       0     0   0   0
##   92       0     0   0   0
##   93       0     0   0   0
##   94       0     0   0   0
##   95       0     0   0   0
##   96       0     0   0   0
##   97       0     0   0   0
##   98       0     1   0   0
##   99       0     0   0   0
##   100      0     0   3   0
##   101      0     0   0   0
##   102      0     0   0   0
##   103      0     0   0   0
##   104      0     0   0   0
##   105      0     0   0   0
##   106      1     0   0   0
##   107      0     0   0   0
##   108      0     0   0   0
##   109      0     0   0   0
##   110      0     0   0   1
##   111      0     0   0   0
##   112      0     0   0   1
##   113      0     0   0   0
##   114      0     0   0   0
##   115      0     0   0   0
##   116      0     0   0   0
##   117      0     0   0   0
##   118      0     0   0   0
##   119      0     0   0   0
##   120      0     0   1   0
##   121      0     0   0   0
##   122      0     0   0   0
##   123      0     0   0   0
##   124      0     0   0   0
##   125      0     0   0   0
##   126      1     0   0   0
##   127      0     0   0   0
##   128      0     0   0   1
##   129      0     0   0   0
##   130      0     0   0   0
##   131      0     0   1   0
##   132      0     0   0   0
##   133      0     0   0   0
##   134      0     0   0   0
##   135      0     0   0   0
##   136      0     1   1   0
##   137      0     0   0   0
##   138      0     0   0   0
##   139      0     0   0   0
##   140      0     0   1   0
##   141      0     0   0   0
##   142      0     0   0   1
##   143      0     0   0   0
##   144      0     0   0   0
##   145      1     0   0   0
##   146      0     0   0   0
##   147      0     0   1   0
##   148      0     0   0   0
##   149      0     0   0   0
##   150      1     0   0   0
##   151      0     0   0   0
##   152      0     0   0   0
##   153      0     0   0   0
##   154      0     1   0   0
##   155      0     0   0   0
##   156      0     0   0   0
##   157      0     0   0   0
##   158      0     0   0   0
##   159      0     0   0   0
##   160      0     0   0   0
##   161      0     0   0   0
##   162      0     0   0   0
##   163      0     0   0   0
##   164      1     0   0   0
##   165      0     0   0   0
##   166      0     0   0   0
##   167      0     0   0   0
##   168      0     0   0   0
##   169      8     1   2   7
set.seed(122)
k<- 3
kmeansResult<-kmeans(m3, k)
round(kmeansResult$centers, digits=3)
##     aja skripsi  dari dulu  lulus  tapi   pak kampus  baru sidang udah
## 1 0.038   0.139 0.044 0.07  0.095 0.063 0.038  0.051 0.108  0.051 0.07
## 2 9.000  12.000 5.000 4.00 10.000 1.000 9.000  2.000 9.000  3.000 5.00
## 3 0.500   0.400 0.100 0.00  0.200 0.100 1.100  0.100 0.200  0.100 0.30
##   skripsian kenapa telat   gak   aku
## 1     0.051  0.051 0.038 0.032 0.057
## 2     5.000  8.000 1.000 2.000 7.000
## 3     0.100  0.100 0.300 0.800 0.100
for (i in 1:k) {
  cat(paste("cluster ", i, ": ", sep = ""))
  s <- sort(kmeansResult$centers[i, ], decreasing = T)
  cat(names(s)[1:5], "\n")
  # print the tweets of every cluster
  # print(tweets[which(kmeansResult£cluster==i)])
}
## cluster 1: skripsi baru lulus dulu udah 
## cluster 2: skripsi lulus aja pak baru 
## cluster 3: pak gak aja skripsi udah