# install.packages("googleVis")
# install.packages("Rtsne")
# install.packages("d3heatmap")
# install.packages("wordcloud")
rm(list=ls(all=T)); gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 495661 26.5     940480 50.3   940480 50.3
## Vcells 937659  7.2    1650153 12.6  1158393  8.9
library(knitr); opts_chunk$set(comment = NA)
library(Matrix)
library(dplyr)
library(googleVis)
library(doParallel)
library(Rtsne)
library(wordcloud)
library(RColorBrewer)
library(randomcoloR)
library(d3heatmap)
library(morpheus)
library(FactoMineR)
library(factoextra)
library(highcharter)
library(tm)
library(slam)
library(MASS)
# options(width=100, gvis.plot.tag='chart', digits=4, scipen=60)
# options(width=100, digits=4, scipen=60)
Load Data
# 讀取已經透過YelpExplore處理好的資料
prep2 = "prep2/"
load(paste0(prep2,"businesses.rdata"))
load(paste0(prep2,"reviews.rdata"))
Merge sentiment score into rev
# 使用cbind()將評論摘要與情緒評分整併
rev = cbind(rev, senti[,-1])
range(rev$date)
[1] "2004-07-22" "2017-07-26"
# 以7月作為分界點,將資料按照所屬範圍之年份進行切割
# rev$year =  as.integer(format(rev$date, "%Y"))
breaks = as.Date(c("2004-07-22", paste0(2005:2017, "-07-27")))
rev$year = as.integer(cut(rev$date, breaks)) + 2004

Contents

  1. 評論數量最多的100個商店
  2. 商業類別的動態比較
  3. Cafe、Breakfast & Brunch、Desserts商業類別
  4. 平台取出評論內容:txt
  5. 聲量、星等、互動、情緒
  6. 商店與話題
  7. 商店的情緒分佈
  8. 文字分析與字雲

(1) 評論數量最多的100個商店

1.1 Top100 bid
# 計算各間商店(bid)數量後,再藉由降冪排列、找出前100間商店,並取得該商店之bid
b100 = rev %>% count(bid) %>% arrange(desc(n)) %>% head(100) %>% .$bid
1.2 Summarise reviews by bid and year
# 計算前100間商店
df = rev %>% filter(bid %in% b100) %>%  
  # 必須符合前100間的bid 
  group_by(bid, year) %>%               
  # 以bid與年份作為群組依據
  summarise(                            
  # 產生摘要資料,將前100間商店的正向、負向、傷心情緒做平均計算
    positive = mean(positive),
    negative = mean(negative),
    sad = mean(sadness),                
    n = n()
    )
1.3 Biz that have review every year after 2009
# 先篩選出2009年後的評論資料、計算單一店家評論數量,再取出bid
y09 = df %>% filter(year >= 2009) %>% count(bid) %>% filter(nn == 9) %>% .$bid
1.4 Google Motion Chart
# 使用Google Motion Chart查看在2010年到2016年之間,使用者評論這些商店表現的狀況如何
df %>% filter(year >= 2010 & year <= 2016 & bid %in% y09) %>% 
  data.frame %>% 
  gvisMotionChart("bid", "year") %>% plot
starting httpd help server ... done


(2) 商業類別的動態比較

2.1 依商業類別彙總評論資料
# 取得商業類別資料,共822筆
load(paste0(prep2, "cat822.rdata"))
2.2 比較討論聲量相似的商業類別
df = cat822 %>% 
  filter(year >= 2010 & year <= 2016) %>%
  # 篩選出2010到2016之間的資料
  filter(category %in% colnames(X)[1:100]) %>%
  # 篩選出1~100的類別資料
  dplyr::select(category, year, negative, stars, useful, n,
  # 選取類別、年份及評論情緒
         positive, joy, anticipation, trust, surprise,
         sadness, anger, disgust, fear, cool, funny
         )  %>% 
  group_by(year) %>%                  # scale scores by years so they become...
  mutate(n = 0.1 + n - min(n) ) %>%   # aviod negative bubble size
  data.frame 

p = gvisMotionChart(df, "category", "year")
plot(p)

依照泡泡圖所示,可以看出在數量與星等的設定下,所有類別走向皆朝右上方移動,可以明顯地看到數量最多的為餐廳還有food等,表示yelp的使用者,對於飲食的重視程度高,yelp中屬於食物相關類別與店家也較多,且現代人對於健康飲食、健身風潮的流行,皆顯示現代人飲食習慣的改變,於是我們決定選擇輕飲食系列作為分析的主要目標。

選擇網美族作為目標對象

選擇網美族作為目標對象



(3) Cafe、Breakfast & Brunch、Desserts商業類別

3.1 設定商店評論選擇條件

選擇在

  • 2011年以後每年都有評論
  • 2011年以後評論的總數數量大於100、小於300
  • 商業類別包含咖啡、早午餐、甜點

的商店,將其bid放在CafesBreakfast & BrunchDesserts裡面

# 只要資料包含Cafes、Breakfast & Brunch、Desserts其中一項以上,即做為計算資料
buty =rowSums(X[,c("Cafes", "Breakfast & Brunch", "Desserts")]) > 0
sum(buty)
[1] 8386
cf = rev %>% 
  filter(bid %in% B$bid[buty]) %>%    
  # 找出評論中屬於這些商店的評論內容
  filter(year >= 2011) %>%            
  # 資料時間在2010年以後
  group_by(bid) %>% 
  summarise(                      
    n_year = n_distinct(year),        # 年份總長
    n_rev = n()                       # 評論數量
    ) %>% 
  filter(
    n_year == 7,                      # 具有七年的資料
    n_rev > 100 & n_rev < 300         # 評論數量在100到300之間
    ) %>% 
  dplyr::select(bid) %>% 
  left_join(B)                        # 整合商店基本資料
Joining, by = "bid"
nrow(cf)
[1] 206
3.2 選出評論

選出這一些商店在2011年之後的所有評論,放在資料框R裡面

R = rev %>% filter(bid %in% cf$bid & year >= 2011)               
# 找出這些商店在2011年以後的評論
par(mar=c(3,4,3,2), cex=0.8)
R %>% count(bid) %>% .$n %>% hist(8, main="No. Reviews per Biz") 

# 以直方圖呈現評論資料


(4) 從平台取出評論內容:txt

# 取出評論內容的資料(需透過hdfs方式快速存取)
library(dplyr)
library(sparklyr)
Sys.setenv(SPARK_HOME="/usr/local/spark/spark-2.1.0-bin-hadoop2.7/")
config <- spark_config()
config$spark.ui.port = "4044"
config$spark.executor.memory = "4G"
config$spark.driver.memory = "4G"
config$spark.yarn.executor.memoryOverhead = "4096"
sc <- spark_connect(master = "spark://hnamenode:7077", config = config)

yelp = "hdfs://192.168.1.100:9000/home/tonychuo/yelp10/pq01/"
Text = spark_read_parquet(sc, "Text", paste0(yelp, "Text"))
txt = Text %>% filter(rid %in% R$rid) %>% collect

spark_disconnect(sc)

總共有11572篇評論

nrow(txt) # 計算評論總數量
[1] 35003

從平台取出的評論次序和R$rid是不同的,所以需要排序一下

c(setequal(R$rid, txt$rid), identical(R$rid, txt$rid))
[1]  TRUE FALSE
txt = left_join(R[,"rid",F], txt) 
Joining, by = "rid"
identical(R$rid, txt$rid)
[1] TRUE
4.1 從主機取出話題評分
ypath = "./prep2/" 
load(paste0(ypath, "empath.rdata"))
empath = left_join(R[,"rid",F], empath)
Joining, by = "rid"
identical(R$rid, empath$rid)
[1] TRUE
gc()                                # release memory
            used  (Mb) gc trigger   (Mb)   max used   (Mb)
Ncells   2678675 143.1    4703850  251.3    3886542  207.6
Vcells 111448601 850.3  830526033 6336.5 1035268734 7898.5
table(rev$year) %>% barplot

par(mar=c(3,4,3,2), cex=0.8)
R$year %>% table %>% barplot(main="No. Review by Year") # 呈現評論數量之直方圖



(5) 聲量、星等、互動、情緒

5.1 商店間的動態比較
df = R %>% group_by(bid, year) %>% 
  mutate(engage = useful + cool + funny) %>%       
  # 增加由useful、cool、funny加總而成的engage
  summarise_at(
    c("engage", "stars", "positive", "negative"),  
    # 將這四個欄位進行平均
    mean) %>% 
  left_join(count(R, bid, year)) %>% 
  data.frame
Joining, by = c("bid", "year")
p = gvisMotionChart(df, "bid", "year")            
# 動態表現商店在聲量、星等、互動、情緒四者的表現
plot(p)
5.2 加入店名、去除重複值
df = left_join(df, B[,c("bid","name")])    # merge in biz name
Joining, by = "bid"
df = cbind(df[,-c(1:2)], df[,1:2,F]) %>%   # move bid to the last column
  filter(! bid %in% c(4606, 114755, 121795, 25483, 19411, 58860, 84942, 77112, 58467, 154230, 130544, 111963, 97552, 154526)) %>% 
  # Starbucks Tim Hortons,去除掉重複的bid
  data.frame
p = gvisMotionChart(df, "name", "year")   # 以動態呈現聲量、星等、互動、情緒表現
plot(p)
5.3 星等上升、下降最快的商店

使用線性回歸找出星等上升、下降最快的商店

library(stringr)
bx = sapply(split(df, df$bid), function(x) {
  lm(stars ~ year, x) %>% coef %>% `[`("year") }) %>%  # take lm coef.
  .[ abs(.) > 0.15  ] %>%                              # 斜率需大於0.15
  names %>% str_extract("[0-9]*") %>% 
  as.integer
p = df %>% filter(bid %in% bx) %>% gvisMotionChart("name", "year")  
# 以動態呈現星等表現較為極端的商店
plot(p)


(6) 商店與話題

6.1 商店話題矩陣與熱圖

Make biz-theme matrix - mx

# biz-theme matrix,產生商業話題矩陣
library(d3heatmap)
mx = sapply(split(empath[,-1], R$bid), colMeans) %>%  t 
bnames = B$name[match(rownames(mx), B$bid)]
rownames(mx) = bnames

Select themes with medium-high weights

# 計算位於0.60~0.95之間資料的欄位加總
rx = colSums(mx) %>% quantile(c(0.60, 0.95))               
mx = mx[, colSums(mx) > rx[1] & colSums(mx) < rx[2]]  

Check the range of weights

# 查看範圍分布(直方圖)
par(cex=0.8)
hist(log(mx+1e-4))   

6.2 D3heatmap
# 產生商店話題之熱圖
mx %>% {log(.+1e-4)} %>% t %>% d3heatmap(color=rev(cm.colors(9)))  
# 實體商店分析
# topics = c("attractive","beauty","affection","leisure","feminine","cleaning","healing","hygiene","tra#veling","fun","vacation")
# tmx = sapply(topics, function (k) {
# rownames(mx)[ order(-mx[,k]) ] %>% head(10)
# } )
# write.csv(tmx,file="tmx.csv")
# table(tmx) %>% sort
6.3 熱圖 + 互動式集群分析

這個工具不能投射在網頁上,只能直接在Rstudio裡面做

library(morpheus)
mx %>% {log(.+1e-4)} %>% morpheus


(7) 商店的情緒分佈

將商店投射到尺度縮減之後的情緒平面上

7.1 建立商店情緒矩陣
pcx = sapply(split(R[,10:19], R$bid), colMeans) %>% t
rownames(pcx) = bnames
7.2 尺度縮減
library(FactoMineR)
library(factoextra)
pcx = pcx %>% scale %>% PCA(ncp=10, graph=F) 

前三個主成分已經涵蓋了90%的變異

# 產生累積差異直方圖
par(cex=0.8)
barplot(pcx$eig[1:10,3],names=1:10,main="Accumulated Variance",
        xlab="No. Components", ylab="% of Variance")
abline(h=seq(0,100,10),col='lightgray')              

7.3 情緒平面上的商店
# 三個類別中,所有商店呈現的情緒分布
source("bipcx.R")
N = n_distinct(R$bid)
bipcx(pcx,1,2,10,N,t1="Strength",t2="Valence",     
      obs='Business', main="Strength & Valence of Sentiment")
bipcx(pcx,3,2,10,N,t1="Arosual",t2="Valence",     
      obs='Business', main="Strength & Valence of Sentiment")


(8) 文字分析與字雲

8.1 建立字頻表 (文件字詞矩陣)
library(tm)
dtm = txt$text %>% 
  iconv(to = "utf-8", sub="") %>% 
  VectorSource %>% Corpus %>% 
  tm_map(content_transformer(tolower)) %>%         # 轉換成小寫
  tm_map(removePunctuation) %>%                    # 刪除標點符號
  tm_map(removeWords, stopwords("english")) %>%    # 刪除贅字
  tm_map(stemDocument) %>%                         # 還原字根
  DocumentTermMatrix %>%                           # 轉換成文字字詞矩陣
  removeSparseTerms(0.998)                         # 篩選掉最後0.2%的資料
dtm  # (documents: 10925, terms: 2520)
8.2 使用TF-IDF篩選字詞
library(slam)
tfidf = tapply(dtm$v/row_sums(dtm)[dtm$i], dtm$j, mean) *
  log2(nrow(dtm)/col_sums(dtm > 0))
summary(tfidf)
dtm = dtm[, tfidf > 0.14811 ]
dtm = dtm[,order(-col_sums(dtm))]
dim(dtm)
8.3 使用tSNE做尺度縮減
library(Rtsne)
n = 800
tsne = dtm[, 1:n] %>% as.data.frame.matrix %>% 
  scale %>% t %>% Rtsne(
    check_duplicates = FALSE, theta=0.0, max_iter=3200)
8.4 層級式集群分析
Y = tsne$Y                        # tSNE coordinates
d = dist(Y)                       # distance matrix
hc = hclust(d)                    # hi-clustering
K = 100                           # number of clusters 
g = cutree(hc,K)                  # cut into K clusters
table(g) %>% as.vector %>% sort   # sizes of clusters
8.5 文字雲
library(randomcoloR)
library(wordcloud)
wc = col_sums(dtm[,1:n])
colors = distinctColorPalette(K)
fsize = log(wc)/5                               # 控制字體大小
range(fsize)
png("fig/cafe.png", width=3200, height=1800)    # 產生png擋
textplot(
  Y[,1], Y[,2], colnames(dtm)[1:n], show=F, 
  col=colors[g],
  cex= fsize,
  font=2)
dev.off()
文字雲圖

文字雲圖



(9) 商務屬性 Business Attributes

9.1 bizatt 資料框
summary(bizatt)
      bid         AcceptsInsurance  AgesAllowed              Alcohol      
 Min.   :     1   Mode :logical    18plus :    86   beer_and_wine:  6444  
 1st Qu.: 39150   FALSE:2149       19plus :    63   full_bar     : 18767  
 Median : 78310   TRUE :6426       21plus :   211   none         : 19022  
 Mean   : 78311   NA's :147684     allages:    35   NA's         :112026  
 3rd Qu.:117462                    NA's   :155864                         
 Max.   :156639                                                           
                                                                          
    BYOB              BYOBCorkage     BikeParking    
 Mode :logical   no         :   752   Mode :logical  
 FALSE:857       yes_corkage:   157   FALSE:16419    
 TRUE :48        yes_free   :   491   TRUE :57064    
 NA's :155354    NA's       :154859   NA's :82776    
                                                     
                                                     
                                                     
 BusinessAcceptsBitcoin BusinessAcceptsCreditCards ByAppointmentOnly
 Mode :logical          Mode :logical              Mode :logical    
 FALSE:8663             FALSE:7579                 FALSE:18059      
 TRUE :211              TRUE :113145               TRUE :16714      
 NA's :147385           NA's :35535                NA's :121486     
                                                                    
                                                                    
                                                                    
   Caters        CoatCheck        Corkage        DogsAllowed    
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:17221     FALSE:6887      FALSE:512       FALSE:7642     
 TRUE :17306     TRUE :1089      TRUE :140       TRUE :3568     
 NA's :121732    NA's :148283    NA's :155607    NA's :145049   
                                                                
                                                                
                                                                
 DriveThru       GoodForDancing  GoodForKids     HappyHour      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:3686      FALSE:6470      FALSE:11114     FALSE:2478     
 TRUE :2352      TRUE :2062      TRUE :47232     TRUE :5937     
 NA's :150221    NA's :147727    NA's :97913     NA's :147844   
                                                                
                                                                
                                                                
   HasTV             NoiseLevel     Open24Hours     OutdoorSeating 
 Mode :logical   average  : 26119   Mode :logical   Mode :logical  
 FALSE:21116     loud     :  3908   FALSE:318       FALSE:29680    
 TRUE :22601     quiet    :  8947   TRUE :29        TRUE :20619    
 NA's :112542    very_loud:  1670   NA's :155912    NA's :105960   
                 NA's     :115615                                  
                                                                   
                                                                   
 RestaurantsAttire RestaurantsCounterService RestaurantsDelivery
 casual: 43277     Mode :logical             Mode :logical      
 dressy:  1397     FALSE:150                 FALSE:35526        
 formal:   125     TRUE :246                 TRUE :11264        
 NA's  :111460     NA's :155863              NA's :109469       
                                                                
                                                                
                                                                
 RestaurantsGoodForGroups RestaurantsPriceRange2 RestaurantsReservations
 Mode :logical            Min.   :1.00           Mode :logical          
 FALSE:6223               1st Qu.:1.00           FALSE:28014            
 TRUE :43391              Median :2.00           TRUE :18496            
 NA's :106645             Mean   :1.82           NA's :109749           
                          3rd Qu.:2.00                                  
                          Max.   :4.00                                  
                          NA's   :59929                                 
 RestaurantsTableService RestaurantsTakeOut    Smoking      
 Mode :logical           Mode :logical      no     :  3019  
 FALSE:14318             FALSE:5355         outdoor:  3513  
 TRUE :25636             TRUE :49277        yes    :  1140  
 NA's :116305            NA's :101627       NA's   :148587  
                                                            
                                                            
                                                            
 WheelchairAccessible   WiFi          casual          classy       
 Mode :logical        free: 20924   Mode :logical   Mode :logical  
 FALSE:5815           no  : 21954   FALSE:23947     FALSE:41968    
 TRUE :38202          paid:   543   TRUE :18942     TRUE :921      
 NA's :112242         NA's:112838   NA's :113370    NA's :113370   
                                                                   
                                                                   
                                                                   
   divey          hipster         intimate        romantic      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:26060     FALSE:41903     FALSE:42299     FALSE:42367    
 TRUE :1111      TRUE :963       TRUE :590       TRUE :522      
 NA's :129088    NA's :113393    NA's :113370    NA's :113370   
                                                                
                                                                
                                                                
  touristy         trendy         upscale          friday       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:42658     FALSE:40921     FALSE:42492     FALSE:1857     
 TRUE :231       TRUE :1968      TRUE :397       TRUE :4534     
 NA's :113370    NA's :113370    NA's :113370    NA's :149868   
                                                                
                                                                
                                                                
   monday         saturday         sunday         thursday      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:5698      FALSE:1726      FALSE:5113      FALSE:3841     
 TRUE :693       TRUE :4665      TRUE :1278      TRUE :2550     
 NA's :149868    NA's :149868    NA's :149868    NA's :149868   
                                                                
                                                                
                                                                
  tuesday        wednesday         garage           lot         
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:5738      FALSE:5247      FALSE:84470     FALSE:54451    
 TRUE :653       TRUE :1144      TRUE :4950      TRUE :34969    
 NA's :149868    NA's :149868    NA's :66839     NA's :66839    
                                                                
                                                                
                                                                
   street          valet         validated       dairy-free     
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:72745     FALSE:87761     FALSE:86762     FALSE:231      
 TRUE :16675     TRUE :1659      TRUE :440       TRUE :18       
 NA's :66839     NA's :66839     NA's :69057     NA's :156010   
                                                                
                                                                
                                                                
 gluten-free       halal           kosher         soy-free      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:225       FALSE:243       FALSE:245       FALSE:243      
 TRUE :24        TRUE :6         TRUE :4         TRUE :6        
 NA's :156010    NA's :156010    NA's :156010    NA's :156010   
                                                                
                                                                
                                                                
   vegan         vegetarian      breakfast         brunch       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:128       FALSE:177       FALSE:39214     FALSE:39388    
 TRUE :121       TRUE :72        TRUE :3726      TRUE :3552     
 NA's :156010    NA's :156010    NA's :113319    NA's :113319   
                                                                
                                                                
                                                                
  dessert          dinner        latenight         lunch        
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:41418     FALSE:27638     FALSE:40462     FALSE:25109    
 TRUE :1522      TRUE :15302     TRUE :2478      TRUE :17831    
 NA's :113319    NA's :113319    NA's :113319    NA's :113319   
                                                                
                                                                
                                                                
 africanamerican   asian          coloring         curly        
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:751       FALSE:598       FALSE:279       FALSE:399      
 TRUE :423       TRUE :576       TRUE :1126      TRUE :1006     
 NA's :155085    NA's :155085    NA's :154854    NA's :154854   
                                                                
                                                                
                                                                
 extensions         kids           perms         straightperms  
 Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:570       FALSE:693       FALSE:745       FALSE:672      
 TRUE :835       TRUE :712       TRUE :660       TRUE :502      
 NA's :154854    NA's :154854    NA's :154854    NA's :155085   
                                                                
                                                                
                                                                
 background_music     dj           jukebox         karaoke       
 Mode :logical    Mode :logical   Mode :logical   Mode :logical  
 FALSE:6193       FALSE:6312      FALSE:6895      FALSE:7785     
 TRUE :1833       TRUE :1714      TRUE :1131      TRUE :241      
 NA's :148233     NA's :148233    NA's :148233    NA's :148233   
                                                                 
                                                                 
                                                                 
    live          no_music         video        
 Mode :logical   Mode :logical   Mode :logical  
 FALSE:6621      FALSE:8026      FALSE:7819     
 TRUE :1405      NA's :148233    TRUE :207      
 NA's :148233                    NA's :148233   
                                                
                                                
                                                
bizatt$RestaurantsPriceRange2[ X[,"Restaurants"] ] %>% table(useNA="ifany")
.
    1     2     3     4  <NA> 
19013 24917  2845   537  4299 
grep("mex|ita",colnames(X),T, value=T)
 [1] "Italian"                "Mexican"               
 [3] "Tex-Mex"                "Hospitals"             
 [5] "Vitamins & Supplements" "Rehabilitation Center" 
 [7] "Guitar Stores"          "Meditation Centers"    
 [9] "Emergency Pet Hospital" "New Mexican Cuisine"   
italian = X[,"Italian"] & X[,"Restaurants"]
mexican = X[,"Mexican"] & X[,"Restaurants"]
c(sum(italian), sum(mexican), sum(italian & mexican))
[1] 4410 3913   25
bizatt$RestaurantsPriceRange2[ italian ] %>% table(useNA="ifany")
.
   1    2    3    4 <NA> 
 788 2698  519   70  335 
bizatt$RestaurantsPriceRange2[ mexican ] %>% table(useNA="ifany")
.
   1    2    3    4 <NA> 
2286 1388   41    5  193 


(10) 結論

從分析流程說明:

  1. 評論數量最多的100個商店
  2. 商業類別的動態比較
  3. Cafe、Breakfast & Brunch、Desserts商業類別
  4. 平台取出評論內容:txt
  5. 聲量、星等、互動、情緒
  6. 商店與話題
  7. 商店的情緒分佈
  8. 文字分析與字雲

從(1)、(2)、(3)可以發現,這是一個決策過程中最關鍵的部分,同時我們也有嘗試有系統地、有邏輯的確認問題(從泡泡圖的趨勢),但純粹的好奇心或過往的經驗依舊主導著問題的選擇,於是加入了主觀意識(飲食習慣改變)型塑出我們的類別。
從我們所選的類別(咖啡廳、早午餐、甜點)都是屬於輕食,所以我們將他們合在一起做比較。而從(4)中的2017年評論比2016來的少,也是值得注意的,往年都是逐年遞增,為何2017反而減少(房市?景氣?),這部分也是我們通過大量的數據來描述發生了什麼事。
接著我們嘗試透過聲量、星等、互動、情緒視覺化圖表來分析,(6)、(7)則更進一步的探討,並希望可以從而剝離問題的根本原因,透過圖表診斷為什麼會發生。例如我們在做完選出星等上升、下降最快的商店後,發現Copper Whisk Café的星等與評論數在2016年時急遽下降,這樣的分析即可幫助我們去搜尋資料,探究其原因,也許是換人經營等等原因。
從此分析中可得知,咖啡廳、早午餐、甜點評論持續增加(除了2017),店家千百種,也可從中找出店家優等生成為標竿,分析其成為優等生之原因,為其他店家做參考與指標。