This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
yelp <- readRDS("C:\\Users\\Xiayang Xiao\\Desktop\\yelp.RDS")
yelptexts <- yelp$text
stars <- yelp$stars
#iconv(x, "latin1", "ASCII", sub="")
clean.text = function(x)
{
x = gsub("rt", "", x)
x = gsub("@\\w+", "", x)
x = gsub("[[:punct:]]", "", x)
x = gsub("[[:digit:]]", "", x)
x = gsub("http\\w+", "", x)
x = gsub("[ |\t]{2,}", "", x)
x = gsub("^ ", "", x)
x = gsub(" $", "", x)
x = tolower(x)
return(x)
}
yelp = clean.text(yelptexts)
services = scan('C:\\Users\\Xiayang Xiao\\Desktop\\service.txt', what='character', comment.char=';')
Read 328 items
price = scan('C:\\Users\\Xiayang Xiao\\Desktop\\price.txt', what='character', comment.char=';')
Read 577 items
food = scan('C:\\Users\\Xiayang Xiao\\Desktop\\food.txt', what='character', comment.char=';')
Read 363 items
score.topic = function(sentences, dict, stars, .progress='none')
{
require(plyr)
require(stringr)
require(stringi)
scores = laply(sentences, function(sentence, dict, star) {
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
sentence = tolower(sentence)
word.list = str_split(sentence, '\\s+')
words = unlist(word.list)
star = unlist(star)
topic.matches = match(words, dict)
topic.matches = !is.na(topic.matches)
score = sum(topic.matches)
return(score)
}, dict, stars, .progress=.progress )
topicscores.df = data.frame(score=scores, text=sentences, star=stars)
return(topicscores.df)
}
topic.scoresprice= score.topic(yelptexts, price, stars, .progress='none')
topic.scorefood = score.topic(yelptexts, food, stars, .progress='none')
topic.services = score.topic(yelptexts, services, stars, .progress='none')
score.price <- topic.scoresprice$score
score.food <- topic.scorefood$score
score.services <- topic.services$score
score <- cbind.data.frame(score.price, score.food, score.services,yelptexts,stars)
N= nrow(subset(score, stars==4))
scores.services = nrow(subset(score, score.services != 0 & stars == 4))
scores.food = nrow(subset(score, score.food != 0 & stars == 4))
scores.price = nrow(subset(score, score.price != 0 & stars == 4))
nomentioned = nrow(subset(score, score.price == 0 & score.food ==0 & score.services ==0 & stars == 4))
dftemp=data.frame(topic=c("price", "food","services","nomentioned"),
number=c(scores.price,scores.food,scores.services, nomentioned))
library (plotly)
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>plotly愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.3 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>搼<U+3E34>搼<U+3E38>挼<U+3E38>攼<U+3E62>搼<U+3E30>攼<U+3E38><U+04AA>戼<U+3E35><U+0133><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E33>戼<U+3E61>ggplot2
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>ggplot2愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.4 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>
搼<U+3E34>搼<U+3E38>挼<U+3E38>攼<U+3E62>戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E33>戼<U+3E61>愼<U+3E31>愼<U+3E65>plotly愼<U+3E31>愼<U+3E66>
The following object is masked from 愼<U+3E31>愼<U+3E65>package:ggplot2愼<U+3E31>愼<U+3E66>:
last_plot
The following objects are masked from 愼<U+3E31>愼<U+3E65>package:plyr愼<U+3E31>愼<U+3E66>:
arrange, mutate, rename, summarise
The following object is masked from 愼<U+3E31>愼<U+3E65>package:stats愼<U+3E31>愼<U+3E66>:
filter
The following object is masked from 愼<U+3E31>愼<U+3E65>package:graphics愼<U+3E31>愼<U+3E66>:
layout
p <- plot_ly(dftemp, x = ~topic, y = ~number, type = "bar")
p
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>bindrcpp愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.4 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>
#pie chat
score <- cbind(score.price, score.food, score.services,yelptexts,stars)
score.services.only = nrow(subset(score, score.services != 0 & score.food == 0 & score.price ==0 & stars == 5))
score.food.only = nrow(subset(score, score.food != 0 & score.price ==0 & score.services ==0 & stars == 5))
score.price.only = nrow(subset(score, score.price != 0 & score.services ==0 & score.food ==0 & stars == 5))
score.price.food = nrow(subset(score, score.food != 0 & score.price !=0 & score.services ==0 & stars == 5))
score.price.services = nrow(subset(score, score.price != 0 & score.services !=0 & score.food == 0 & stars == 5))
score.food.services = nrow(subset(score, score.price == 0 & score.services !=0 & score.food !=0 & stars == 5))
score.Trine = nrow(subset(score, score.services != 0 & score.food != 0 & score.price !=0 & stars == 5))
nomentioned = nrow(subset(score, score.price == 0 & score.food ==0 & score.services ==0 & stars == 5))
dftemp=data.frame(topic=c("price only", "food only","services only", " price and food","price and services","food and services" ,"Trine" , "nomentioned"),
number=c(score.price.only,score.food.only,score.services.only,score.price.food,score.price.services,score.food.services,score.Trine, nomentioned))
library(plotly)
p <- plot_ly(data=dftemp, labels = ~topic, values = ~number, type = 'pie') %>%
layout(title = 'Pie Chart of Yelp',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.