you can find the file in their name are googleplaystore.csv and wordcloud
https://github.com/empireisme/master/tree/master/app
1.please remember to install the following packages!
library(readr)
require(jiebaR)
## Loading required package: jiebaR
## Warning: package 'jiebaR' was built under R version 3.6.3
## Loading required package: jiebaRD
## Warning: package 'jiebaRD' was built under R version 3.6.3
require(wordcloud)
## Loading required package: wordcloud
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
2.please put this file(rmd) and the googleplaystore.csv in the same folder
getwd()
## [1] "C:/Users/User/Documents/GitHub/master/app"
data<- read_csv("googleplaystore.csv")
## Parsed with column specification:
## cols(
## App = col_character(),
## Category = col_character(),
## Rating = col_double(),
## Reviews = col_double(),
## Size = col_character(),
## Installs = col_character(),
## Type = col_character(),
## Price = col_character(),
## `Content Rating` = col_character(),
## Genres = col_character(),
## `Last Updated` = col_character(),
## `Current Ver` = col_character(),
## `Android Ver` = col_character()
## )
datause<- data
mixseg<-worker()
seg <- mixseg[datause$App]
segA<-data.frame(table(seg))
segC<-data.frame(table(seg[nchar(seg)>1]))#data.frame
segC_top50<-head(segC[order(segC$Freq,decreasing = TRUE),],50)
word_remove<- c("for","The","and","of","the","by","with","to","in","For")
index<- which(segC_top50$Var1%in%word_remove) # tell you where is the word you want to remove
android<- segC_top50[-index,]
library(wordcloud)
par(family=("Heiti TC Light"))
wordcloud(
words = android[,1], # 或segC_top50$Var1
freq = android$Freq,
scale = c(4,.1), # 給定文字尺寸的區間(向量)
random.order = FALSE,# 關閉文字隨機顯示 按順序
ordered.colors = FALSE,#關閉配色順序
rot.per = FALSE,#關閉文字轉角度
min.freq = 7,# 定義最小freq數字
colors = brewer.pal(8,"Dark2")
)
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
ios <- read_csv("C:/Users/User/Desktop/AppleStore.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## id = col_double(),
## track_name = col_character(),
## size_bytes = col_double(),
## currency = col_character(),
## price = col_double(),
## rating_count_tot = col_double(),
## rating_count_ver = col_double(),
## user_rating = col_double(),
## user_rating_ver = col_double(),
## ver = col_character(),
## cont_rating = col_character(),
## prime_genre = col_character(),
## sup_devices.num = col_double(),
## ipadSc_urls.num = col_double(),
## lang.num = col_double(),
## vpp_lic = col_double()
## )
datause<- ios
mixseg<-worker()
seg <- mixseg[datause$track_name]
segA<-data.frame(table(seg))
segC<-data.frame(table(seg[nchar(seg)>1]))#data.frame
segC_top50<-head(segC[order(segC$Freq,decreasing = TRUE),],50)
please see this segC_top50 it is a frenquency table
segC_top50
## Var1 Freq
## 2972 for 451
## 7367 The 343
## 683 and 296
## 3123 Game 296
## 5197 of 224
## 7366 the 219
## 3454 HD 194
## 5833 Pro 142
## 3079 Full 140
## 1392 by 121
## 6629 Simulator 121
## 3029 Free 116
## 3504 Hidden 110
## 3132 Games 100
## 4957 My 95
## 8176 with 94
## 5170 Object 93
## 741 App 90
## 7901 Video 90
## 8217 World 90
## 4976 Mystery 89
## 7484 to 89
## 5526 Photo 82
## 3725 in 75
## 541 Adventure 74
## 4939 Music 70
## 2472 Edition 69
## 3832 iPad 66
## 4309 Live 62
## 4736 Minecraft 58
## 5972 Racing 58
## 5914 Puzzle 55
## 2475 Editor 54
## 4798 Mobile 54
## 5686 Pocket 54
## 7568 Tracker 54
## 7969 VR 53
## 1657 City 52
## 6305 Run 52
## 8301 Your 52
## 4484 Maker 50
## 5656 Play 50
## 425 2016 49
## 4049 Kids 49
## 6037 Real 49
## 1460 Car 47
## 237 <U+8131>出 46
## 1425 Camera 44
## 6290 RPG 43
## 8065 Weather 43
put the word you want to remove in word_remove function
word_remove<- c("for","The","and","of","the","by","with","to","in","For")
index<- which(segC_top50$Var1%in%word_remove) # tell you where is the word you want to remove
ios1<- segC_top50[-index,]
library(wordcloud)
par(family=("Heiti TC Light"))
wordcloud(
words = ios1[,1], # 或segC_top50$Var1
freq = ios1$Freq,
scale = c(4,.1), # 給定文字尺寸的區間(向量)
random.order = FALSE,# 關閉文字隨機顯示 按順序
ordered.colors = FALSE,#關閉配色順序
rot.per = FALSE,#關閉文字轉角度
min.freq = 7,# 定義最小freq數字
colors = brewer.pal(8,"Dark2")
)
put two in same page
par(mfrow = c(1,2))
library(wordcloud)
par(family=("Heiti TC Light"))
wordcloud(
words = android[,1], # 或segC_top50$Var1
freq = android$Freq,
scale = c(3,.1), # 給定文字尺寸的區間(向量)
random.order = FALSE,# 關閉文字隨機顯示 按順序
ordered.colors = FALSE,#關閉配色順序
rot.per = FALSE,#關閉文字轉角度
min.freq = 7,# 定義最小freq數字
colors = brewer.pal(8,"Dark2")
)
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
library(wordcloud)
par(family=("Heiti TC Light"))
wordcloud(
words = ios1[,1], # 或segC_top50$Var1
freq = ios1$Freq,
scale = c(3,.1), # 給定文字尺寸的區間(向量)
random.order = FALSE,# 關閉文字隨機顯示 按順序
ordered.colors = FALSE,#關閉配色順序
rot.per = FALSE,#關閉文字轉角度
min.freq = 7,# 定義最小freq數字
colors = brewer.pal(8,"Dark2")
)
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database
## Warning in strwidth(words[i], cex = size[i], ...): font family not found in
## Windows font database