This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

yelp  <- readRDS("C:\\Users\\Xiayang Xiao\\Desktop\\yelp.RDS")
yelptexts <- yelp$text
stars <- yelp$stars
#iconv(x, "latin1", "ASCII", sub="")
clean.text = function(x)
{
  
 
  x = gsub("rt", "", x)
  
  x = gsub("@\\w+", "", x)
 
  x = gsub("[[:punct:]]", "", x)
  x = gsub("[[:digit:]]", "", x)
  x = gsub("http\\w+", "", x)
  x = gsub("[ |\t]{2,}", "", x)
  x = gsub("^ ", "", x)
  x = gsub(" $", "", x)
  
  x = tolower(x)
  return(x)
}
yelp = clean.text(yelptexts)
services = scan('C:\\Users\\Xiayang Xiao\\Desktop\\service.txt', what='character', comment.char=';')
Read 328 items
price = scan('C:\\Users\\Xiayang Xiao\\Desktop\\price.txt', what='character', comment.char=';')
Read 577 items
food = scan('C:\\Users\\Xiayang Xiao\\Desktop\\food.txt', what='character', comment.char=';')
Read 363 items
score.topic = function(sentences, dict, stars, .progress='none')
{
  require(plyr)
  require(stringr)
  require(stringi)
  
  
  scores = laply(sentences, function(sentence, dict, star) {
    
    
sentence = gsub('[[:punct:]]', '', sentence)
    sentence = gsub('[[:cntrl:]]', '', sentence)
    sentence = gsub('\\d+', '', sentence)
    sentence = tolower(sentence)    
   
    word.list = str_split(sentence, '\\s+')
   
    words = unlist(word.list)
    
    star = unlist(star)
    
    
    topic.matches = match(words, dict)
    
    
    topic.matches = !is.na(topic.matches)
   
    score = sum(topic.matches)
    
    return(score)
  }, dict, stars, .progress=.progress )
  
  topicscores.df = data.frame(score=scores, text=sentences, star=stars)
  return(topicscores.df)
}
topic.scoresprice= score.topic(yelptexts, price, stars, .progress='none')
topic.scorefood = score.topic(yelptexts, food, stars, .progress='none')
topic.services = score.topic(yelptexts, services, stars, .progress='none')
score.price <- topic.scoresprice$score
score.food <- topic.scorefood$score
score.services <- topic.services$score
score <- cbind.data.frame(score.price, score.food, score.services,yelptexts,stars)
N= nrow(subset(score, stars==4))
scores.services = nrow(subset(score, score.services != 0 & stars == 4))
scores.food = nrow(subset(score, score.food != 0 & stars == 4))
scores.price = nrow(subset(score, score.price != 0 & stars == 4))
nomentioned = nrow(subset(score, score.price == 0 & score.food ==0 & score.services ==0 & stars == 4))
dftemp=data.frame(topic=c("price", "food","services","nomentioned"), 
                  number=c(scores.price,scores.food,scores.services, nomentioned))
library (plotly)
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>plotly愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.3 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>搼<U+3E34>搼<U+3E38>挼<U+3E38>攼<U+3E62>搼<U+3E30>攼<U+3E38><U+04AA>戼<U+3E35><U+0133><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E33>戼<U+3E61>ggplot2
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>ggplot2愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.4 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>
搼<U+3E34>搼<U+3E38>挼<U+3E38>攼<U+3E62>戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E33>戼<U+3E61>愼<U+3E31>愼<U+3E65>plotly愼<U+3E31>愼<U+3E66>

The following object is masked from 愼<U+3E31>愼<U+3E65>package:ggplot2愼<U+3E31>愼<U+3E66>:

    last_plot

The following objects are masked from 愼<U+3E31>愼<U+3E65>package:plyr愼<U+3E31>愼<U+3E66>:

    arrange, mutate, rename, summarise

The following object is masked from 愼<U+3E31>愼<U+3E65>package:stats愼<U+3E31>愼<U+3E66>:

    filter

The following object is masked from 愼<U+3E31>愼<U+3E65>package:graphics愼<U+3E31>愼<U+3E66>:

    layout
p <- plot_ly(dftemp, x = ~topic, y = ~number,  type = "bar")
p    
戼<U+3E33><U+033C>愼<U+3E64>戼<U+3E30>昼<U+3E63>愼<U+3E31>愼<U+3E65>bindrcpp愼<U+3E31>愼<U+3E66>挼<U+3E61>挼<U+3E37>搼<U+3E33>挼<U+3E33>R戼<U+3E30>汾3.4.4 挼<U+3E30>戼<U+3E34>戼<U+3E64>愼<U+3E38>搼<U+3E34>攼<U+3E63>戼<U+3E35>挼<U+3E34>

#pie chat
score <- cbind(score.price, score.food, score.services,yelptexts,stars) 
score.services.only = nrow(subset(score, score.services != 0 & score.food == 0 & score.price ==0 & stars == 5))
score.food.only = nrow(subset(score, score.food != 0 & score.price ==0 & score.services ==0 & stars == 5))
score.price.only = nrow(subset(score, score.price != 0 & score.services ==0 & score.food ==0 & stars == 5))
score.price.food = nrow(subset(score, score.food != 0 & score.price !=0 & score.services ==0 & stars == 5))
score.price.services = nrow(subset(score, score.price != 0 & score.services !=0 & score.food == 0 & stars == 5))
score.food.services = nrow(subset(score, score.price == 0 & score.services !=0 & score.food !=0 & stars == 5))
score.Trine = nrow(subset(score, score.services != 0 & score.food != 0 & score.price !=0 & stars == 5))
nomentioned = nrow(subset(score, score.price == 0 & score.food ==0 & score.services ==0 & stars == 5))
dftemp=data.frame(topic=c("price only", "food only","services only", " price and food","price and services","food and services" ,"Trine" , "nomentioned"), 
                  number=c(score.price.only,score.food.only,score.services.only,score.price.food,score.price.services,score.food.services,score.Trine, nomentioned))
library(plotly)
p <- plot_ly(data=dftemp, labels = ~topic, values = ~number, type = 'pie') %>%
  layout(title = 'Pie Chart of Yelp',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQp5ZWxwICA8LSByZWFkUkRTKCJDOlxcVXNlcnNcXFhpYXlhbmcgWGlhb1xcRGVza3RvcFxceWVscC5SRFMiKQ0KeWVscHRleHRzIDwtIHllbHAkdGV4dA0Kc3RhcnMgPC0geWVscCRzdGFycw0KI2ljb252KHgsICJsYXRpbjEiLCAiQVNDSUkiLCBzdWI9IiIpDQpjbGVhbi50ZXh0ID0gZnVuY3Rpb24oeCkNCnsNCiAgDQoNCiANCiAgeCA9IGdzdWIoInJ0IiwgIiIsIHgpDQogIA0KICB4ID0gZ3N1YigiQFxcdysiLCAiIiwgeCkNCiANCiAgeCA9IGdzdWIoIltbOnB1bmN0Ol1dIiwgIiIsIHgpDQoNCiAgeCA9IGdzdWIoIltbOmRpZ2l0Ol1dIiwgIiIsIHgpDQoNCiAgeCA9IGdzdWIoImh0dHBcXHcrIiwgIiIsIHgpDQoNCiAgeCA9IGdzdWIoIlsgfFx0XXsyLH0iLCAiIiwgeCkNCg0KICB4ID0gZ3N1YigiXiAiLCAiIiwgeCkNCg0KICB4ID0gZ3N1YigiICQiLCAiIiwgeCkNCiAgDQogIHggPSB0b2xvd2VyKHgpDQogIHJldHVybih4KQ0KfQ0KeWVscCA9IGNsZWFuLnRleHQoeWVscHRleHRzKQ0Kc2VydmljZXMgPSBzY2FuKCdDOlxcVXNlcnNcXFhpYXlhbmcgWGlhb1xcRGVza3RvcFxcc2VydmljZS50eHQnLCB3aGF0PSdjaGFyYWN0ZXInLCBjb21tZW50LmNoYXI9JzsnKQ0KcHJpY2UgPSBzY2FuKCdDOlxcVXNlcnNcXFhpYXlhbmcgWGlhb1xcRGVza3RvcFxccHJpY2UudHh0Jywgd2hhdD0nY2hhcmFjdGVyJywgY29tbWVudC5jaGFyPSc7JykNCmZvb2QgPSBzY2FuKCdDOlxcVXNlcnNcXFhpYXlhbmcgWGlhb1xcRGVza3RvcFxcZm9vZC50eHQnLCB3aGF0PSdjaGFyYWN0ZXInLCBjb21tZW50LmNoYXI9JzsnKQ0KDQpzY29yZS50b3BpYyA9IGZ1bmN0aW9uKHNlbnRlbmNlcywgZGljdCwgc3RhcnMsIC5wcm9ncmVzcz0nbm9uZScpDQp7DQogIHJlcXVpcmUocGx5cikNCiAgcmVxdWlyZShzdHJpbmdyKQ0KICByZXF1aXJlKHN0cmluZ2kpDQogIA0KICANCiAgc2NvcmVzID0gbGFwbHkoc2VudGVuY2VzLCBmdW5jdGlvbihzZW50ZW5jZSwgZGljdCwgc3Rhcikgew0KICAgIA0KICAgIA0Kc2VudGVuY2UgPSBnc3ViKCdbWzpwdW5jdDpdXScsICcnLCBzZW50ZW5jZSkNCiAgICBzZW50ZW5jZSA9IGdzdWIoJ1tbOmNudHJsOl1dJywgJycsIHNlbnRlbmNlKQ0KICAgIHNlbnRlbmNlID0gZ3N1YignXFxkKycsICcnLCBzZW50ZW5jZSkNCiAgICBzZW50ZW5jZSA9IHRvbG93ZXIoc2VudGVuY2UpICAgIA0KICAgDQogICAgd29yZC5saXN0ID0gc3RyX3NwbGl0KHNlbnRlbmNlLCAnXFxzKycpDQogICANCiAgICB3b3JkcyA9IHVubGlzdCh3b3JkLmxpc3QpDQogICAgDQogICAgc3RhciA9IHVubGlzdChzdGFyKQ0KICAgIA0KICAgIA0KICAgIHRvcGljLm1hdGNoZXMgPSBtYXRjaCh3b3JkcywgZGljdCkNCiAgICANCiAgICANCiAgICB0b3BpYy5tYXRjaGVzID0gIWlzLm5hKHRvcGljLm1hdGNoZXMpDQogICANCiAgICBzY29yZSA9IHN1bSh0b3BpYy5tYXRjaGVzKQ0KICAgIA0KICAgIHJldHVybihzY29yZSkNCiAgfSwgZGljdCwgc3RhcnMsIC5wcm9ncmVzcz0ucHJvZ3Jlc3MgKQ0KICANCiAgdG9waWNzY29yZXMuZGYgPSBkYXRhLmZyYW1lKHNjb3JlPXNjb3JlcywgdGV4dD1zZW50ZW5jZXMsIHN0YXI9c3RhcnMpDQogIHJldHVybih0b3BpY3Njb3Jlcy5kZikNCn0NCg0KdG9waWMuc2NvcmVzcHJpY2U9IHNjb3JlLnRvcGljKHllbHB0ZXh0cywgcHJpY2UsIHN0YXJzLCAucHJvZ3Jlc3M9J25vbmUnKQ0KDQp0b3BpYy5zY29yZWZvb2QgPSBzY29yZS50b3BpYyh5ZWxwdGV4dHMsIGZvb2QsIHN0YXJzLCAucHJvZ3Jlc3M9J25vbmUnKQ0KDQp0b3BpYy5zZXJ2aWNlcyA9IHNjb3JlLnRvcGljKHllbHB0ZXh0cywgc2VydmljZXMsIHN0YXJzLCAucHJvZ3Jlc3M9J25vbmUnKQ0KDQpzY29yZS5wcmljZSA8LSB0b3BpYy5zY29yZXNwcmljZSRzY29yZQ0Kc2NvcmUuZm9vZCA8LSB0b3BpYy5zY29yZWZvb2Qkc2NvcmUNCnNjb3JlLnNlcnZpY2VzIDwtIHRvcGljLnNlcnZpY2VzJHNjb3JlDQoNCg0KDQpzY29yZSA8LSBjYmluZC5kYXRhLmZyYW1lKHNjb3JlLnByaWNlLCBzY29yZS5mb29kLCBzY29yZS5zZXJ2aWNlcyx5ZWxwdGV4dHMsc3RhcnMpDQoNCk49IG5yb3coc3Vic2V0KHNjb3JlLCBzdGFycz09NCkpDQoNCnNjb3Jlcy5zZXJ2aWNlcyA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5zZXJ2aWNlcyAhPSAwICYgc3RhcnMgPT0gNCkpDQpzY29yZXMuZm9vZCA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5mb29kICE9IDAgJiBzdGFycyA9PSA0KSkNCnNjb3Jlcy5wcmljZSA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5wcmljZSAhPSAwICYgc3RhcnMgPT0gNCkpDQpub21lbnRpb25lZCA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5wcmljZSA9PSAwICYgc2NvcmUuZm9vZCA9PTAgJiBzY29yZS5zZXJ2aWNlcyA9PTAgJiBzdGFycyA9PSA0KSkNCg0KZGZ0ZW1wPWRhdGEuZnJhbWUodG9waWM9YygicHJpY2UiLCAiZm9vZCIsInNlcnZpY2VzIiwibm9tZW50aW9uZWQiKSwgDQogICAgICAgICAgICAgICAgICBudW1iZXI9YyhzY29yZXMucHJpY2Usc2NvcmVzLmZvb2Qsc2NvcmVzLnNlcnZpY2VzLCBub21lbnRpb25lZCkpDQpsaWJyYXJ5IChwbG90bHkpDQpwIDwtIHBsb3RfbHkoZGZ0ZW1wLCB4ID0gfnRvcGljLCB5ID0gfm51bWJlciwgIHR5cGUgPSAiYmFyIikNCg0KcCAgICANCg0KDQoNCiNwaWUgY2hhdA0KDQpzY29yZSA8LSBjYmluZChzY29yZS5wcmljZSwgc2NvcmUuZm9vZCwgc2NvcmUuc2VydmljZXMseWVscHRleHRzLHN0YXJzKSANCnNjb3JlLnNlcnZpY2VzLm9ubHkgPSBucm93KHN1YnNldChzY29yZSwgc2NvcmUuc2VydmljZXMgIT0gMCAmIHNjb3JlLmZvb2QgPT0gMCAmIHNjb3JlLnByaWNlID09MCAmIHN0YXJzID09IDUpKQ0Kc2NvcmUuZm9vZC5vbmx5ID0gbnJvdyhzdWJzZXQoc2NvcmUsIHNjb3JlLmZvb2QgIT0gMCAmIHNjb3JlLnByaWNlID09MCAmIHNjb3JlLnNlcnZpY2VzID09MCAmIHN0YXJzID09IDUpKQ0Kc2NvcmUucHJpY2Uub25seSA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5wcmljZSAhPSAwICYgc2NvcmUuc2VydmljZXMgPT0wICYgc2NvcmUuZm9vZCA9PTAgJiBzdGFycyA9PSA1KSkNCg0KDQpzY29yZS5wcmljZS5mb29kID0gbnJvdyhzdWJzZXQoc2NvcmUsIHNjb3JlLmZvb2QgIT0gMCAmIHNjb3JlLnByaWNlICE9MCAmIHNjb3JlLnNlcnZpY2VzID09MCAmIHN0YXJzID09IDUpKQ0Kc2NvcmUucHJpY2Uuc2VydmljZXMgPSBucm93KHN1YnNldChzY29yZSwgc2NvcmUucHJpY2UgIT0gMCAmIHNjb3JlLnNlcnZpY2VzICE9MCAmIHNjb3JlLmZvb2QgPT0gMCAmIHN0YXJzID09IDUpKQ0Kc2NvcmUuZm9vZC5zZXJ2aWNlcyA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5wcmljZSA9PSAwICYgc2NvcmUuc2VydmljZXMgIT0wICYgc2NvcmUuZm9vZCAhPTAgJiBzdGFycyA9PSA1KSkNCg0Kc2NvcmUuVHJpbmUgPSBucm93KHN1YnNldChzY29yZSwgc2NvcmUuc2VydmljZXMgIT0gMCAmIHNjb3JlLmZvb2QgIT0gMCAmIHNjb3JlLnByaWNlICE9MCAmIHN0YXJzID09IDUpKQ0KDQpub21lbnRpb25lZCA9IG5yb3coc3Vic2V0KHNjb3JlLCBzY29yZS5wcmljZSA9PSAwICYgc2NvcmUuZm9vZCA9PTAgJiBzY29yZS5zZXJ2aWNlcyA9PTAgJiBzdGFycyA9PSA1KSkNCg0KZGZ0ZW1wPWRhdGEuZnJhbWUodG9waWM9YygicHJpY2Ugb25seSIsICJmb29kIG9ubHkiLCJzZXJ2aWNlcyBvbmx5IiwgIiBwcmljZSBhbmQgZm9vZCIsInByaWNlIGFuZCBzZXJ2aWNlcyIsImZvb2QgYW5kIHNlcnZpY2VzIiAsIlRyaW5lIiAsICJub21lbnRpb25lZCIpLCANCiAgICAgICAgICAgICAgICAgIG51bWJlcj1jKHNjb3JlLnByaWNlLm9ubHksc2NvcmUuZm9vZC5vbmx5LHNjb3JlLnNlcnZpY2VzLm9ubHksc2NvcmUucHJpY2UuZm9vZCxzY29yZS5wcmljZS5zZXJ2aWNlcyxzY29yZS5mb29kLnNlcnZpY2VzLHNjb3JlLlRyaW5lLCBub21lbnRpb25lZCkpDQoNCmxpYnJhcnkocGxvdGx5KQ0KcCA8LSBwbG90X2x5KGRhdGE9ZGZ0ZW1wLCBsYWJlbHMgPSB+dG9waWMsIHZhbHVlcyA9IH5udW1iZXIsIHR5cGUgPSAncGllJykgJT4lDQogIGxheW91dCh0aXRsZSA9ICdQaWUgQ2hhcnQgb2YgWWVscCcsDQogICAgICAgICB4YXhpcyA9IGxpc3Qoc2hvd2dyaWQgPSBGQUxTRSwgemVyb2xpbmUgPSBGQUxTRSwgc2hvd3RpY2tsYWJlbHMgPSBGQUxTRSksDQogICAgICAgICB5YXhpcyA9IGxpc3Qoc2hvd2dyaWQgPSBGQUxTRSwgemVyb2xpbmUgPSBGQUxTRSwgc2hvd3RpY2tsYWJlbHMgPSBGQUxTRSkpDQpwDQoNCg0KYGBgDQoNCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDdHJsK0FsdCtJKi4NCg0KV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuDQoNClRoZSBwcmV2aWV3IHNob3dzIHlvdSBhIHJlbmRlcmVkIEhUTUwgY29weSBvZiB0aGUgY29udGVudHMgb2YgdGhlIGVkaXRvci4gQ29uc2VxdWVudGx5LCB1bmxpa2UgKktuaXQqLCAqUHJldmlldyogZG9lcyBub3QgcnVuIGFueSBSIGNvZGUgY2h1bmtzLiBJbnN0ZWFkLCB0aGUgb3V0cHV0IG9mIHRoZSBjaHVuayB3aGVuIGl0IHdhcyBsYXN0IHJ1biBpbiB0aGUgZWRpdG9yIGlzIGRpc3BsYXllZC4NCg==