AS12 YelpDemo - 資料分析示範 (2)

rm(list=ls(all=T)); gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 490275 26.2     940480 50.3   592000 31.7
## Vcells 916122  7.0    1650153 12.6  1081161  8.3

library(knitr); opts_chunk$set(comment = NA)
library(Matrix)
library(dplyr)
library(googleVis)
library(doParallel)
library(Rtsne)
library(wordcloud)
library(RColorBrewer)
library(randomcoloR)

library(d3heatmap)
library(morpheus)
library(FactoMineR)
library(factoextra)
library(highcharter)
library(tm)
library(slam)
library(MASS)

options(width=100, gvis.plot.tag='chart', digits=4, scipen=60)
# options(width=100, digits=4, scipen=60)

Load Data

prep2 = "/home/tonychuo/_S2018/yelp10/prep2/"
load(paste0(prep2,"businesses.rdata"))
load(paste0(prep2,"reviews.rdata"))

Merge sentiment score into `rev`

rev = cbind(rev, senti[,-1])
range(rev$date)

[1] "2004-07-22" "2017-07-26"

我改用用年中來切年份，這樣可以避免最後一年評論數下降，提升圖表的連續性，也比較容易觀察趨勢，大家可以在2.2的動態圖表裡面，清楚的看到整個目標族群在變數空間裡面移動的效果。

#rev$year =  as.integer(format(rev$date, "%Y"))
breaks = as.Date(c("2004-07-22", paste0(2005:2017, "-07-27")))
rev$year = as.integer(cut(rev$date, breaks)) + 2004

(1) 評論數量最多的100個商店

1.1 Top100 bid

b100 = rev %>% count(bid) %>% arrange(desc(n)) %>% head(100) %>% .$bid

1.2 Summarise reviews by bid and year

df = rev %>% filter(bid %in% b100) %>% 
  group_by(bid, year) %>% 
  summarise(
    positive = mean(positive),
    negative = mean(negative),
    sad = mean(sadness),
    n = n()
    )

1.3 Biz that have review every year after 2009

y09 = df %>% filter(year >= 2009) %>% count(bid) %>% filter(nn == 9) %>% .$bid

1.4 Google Motion Chart

df %>% filter(year >= 2010 & bid %in% y09) %>% 
  data.frame %>% 
  gvisMotionChart("bid", "year") %>% plot

(2) 商業類別的動態比較

2.1 依商業類別彙總評論資料

LOAD = TRUE
if(LOAD) { 
  load(paste0(prep2, "cat822.rdata"))
} else {

library(doParallel)
detectCores()
cores <- makeCluster(16)
registerDoParallel(cores)
getDoParWorkers()
t0 = Sys.time()
cat822 = foreach(i=1:822, .combine=rbind, .packages="Matrix") %dopar% {
  rx = rev[rev$bid %in% B$bid[X[,i]],]
  cbind(
    category = colnames(X)[i],
    n = as.integer( table(rx$year) ),
    aggregate(. ~ year, rx[,c(1,3:5,7,10:20)], mean)
    ) }
Sys.time() - t0 # 58.0772 secs
stopCluster(cores)

}

2.2 比較討論聲量相似的商業類別

df = cat822 %>% 
  filter(year >= 2010) %>%
  filter(category %in% colnames(X)[20:50]) %>%
  dplyr::select(category, year, negative, stars, useful, n,
         positive, joy, anticipation, trust, surprise,
         sadness, anger, disgust, fear, cool, funny
         )  %>% 
  #group_by(year) %>%                  # scale scores by years so they
  #mutate_at(c(3:17), scale) %>%       #   are contemporarily relative  
  #mutate(n = 0.1 + n - min(n) ) %>%   # aviod negative bubble size
  data.frame 

p = gvisMotionChart(df, "category", "year")
plot(p)

(3) Japanese 商業類別

3.1 設定商店評論選擇條件

選擇在

2011年以後每年都有評論
2011年以後評論的總數數量大於100、小於300
商業類別包含日本

的商店，將其bid放在japs裡面

japs = rev %>% 
  filter(bid %in% B$bid[X[,"Japanese"]]) %>% 
  filter(year >= 2011) %>% 
  group_by(bid) %>% 
  summarise(                      
    n_year = n_distinct(year),   # 
    n_rev = n()                  # 
    ) %>% 
  filter(
    n_year == 7,
    n_rev > 100 & n_rev < 300
    ) %>% 
  dplyr::select(bid) %>% 
  left_join(B)

Joining, by = "bid"

nrow(japs)

[1] 65

3.2 選出評論

選出這一些商店在2011年之後的所有評論，放在資料框R裡面

R = rev %>% filter(bid %in% japs$bid & year >= 2011)
par(mar=c(3,4,3,2), cex=0.8)
R %>% count(bid) %>% .$n %>% hist(8, main="No. Reviews per Biz")

(4) 從平台取出評論內容：`txt`

#########################################################
library(dplyr) 
library(sparklyr)
Sys.setenv(SPARK_HOME="/usr/local/spark/spark-2.1.0-bin-hadoop2.7/")
config <- spark_config()
# config$spark.ui.port = "4044"
config$spark.executor.memory = "4G"
config$spark.driver.memory = "4G" 
config$spark.yarn.executor.memoryOverhead = "4096"
sc <- spark_connect(master = "spark://hnamenode:7077", config = config)

yelp = "hdfs://192.168.1.100:9000/home/tonychuo/yelp10/pq01/"
Text = spark_read_parquet(sc, "Text", paste0(yelp, "Text"))
txt = Text %>% filter(rid %in% R$rid) %>% collect

spark_disconnect(sc)

nrow(txt)

[1] 10866

總共有10,925篇評論

c(setequal(R$rid, txt$rid), identical(R$rid, txt$rid))

[1]  TRUE FALSE

從平台取出的評論次序和R$rid是不同的，所以需要排序一下

txt = left_join(R[,"rid",F], txt)

Joining, by = "rid"

identical(R$rid, txt$rid)

[1] TRUE

從主機取出話題評分

ypath = "/home/tonychuo/_S2018/yelp10/data/" 
load(paste0(ypath, "empath.rdata"))
empath = left_join(R[,"rid",F], empath)

Joining, by = "rid"

identical(R$rid, empath$rid)

[1] TRUE

gc()                                # release memory

            used  (Mb) gc trigger   (Mb)   max used   (Mb)
Ncells   2654681 141.8    4703850  251.3    3886542  207.6
Vcells 104317347 795.9  891850480 6804.3 1028137623 7844.1

par(mar=c(3,4,3,2), cex=0.8)
R$year %>% table %>% barplot(main="No. Review by Year")

(5) 聲量、星等、互動、情緒

5.1 商店間的動態比較

df = R %>% group_by(bid, year) %>% 
  mutate(engage = useful + cool + funny) %>% 
  summarise_at(
    c("engage", "stars", "positive", "negative"), 
    mean) %>% 
  left_join(count(R, bid, year)) %>% 
  data.frame

Joining, by = c("bid", "year")

p = gvisMotionChart(df, "bid", "year")
plot(p)

5.2 加入店名、去除極端值

df = left_join(df, B[,c("bid","name")])    # merge in biz name

Joining, by = "bid"

df = cbind(df[,-c(1:2)], df[,1:2,F]) %>%   # move bid to the last column
  filter(! bid %in% c(6115, 3406, 30751)) %>%
  data.frame

p = gvisMotionChart(df, "name", "year") 
plot(p)

5.3 星等上升、下降最快的商店

使用線性回歸找出星等上升、下降最快的商店

library(stringr)
bx = sapply(split(df, df$bid), function(x) {
  lm(stars ~ year, x) %>% coef %>% `[`("year") }) %>%  # take lm coef.
  .[ abs(.) > 0.15  ] %>%                              # 
  names %>% str_extract("[0-9]*") %>% 
  as.integer

p = df %>% filter(bid %in% bx) %>% gvisMotionChart("name", "year")
plot(p)

(6) 商店與話題

6.1 商店話題矩陣與熱圖

Make biz-theme matrix - mx

library(d3heatmap)
mx = sapply(split(empath[,-1], R$bid), colMeans) %>%  t    # biz-theme matrix

bnames = B$name[match(rownames(mx), B$bid)]
rownames(mx) = bnames

Select themes with medium-high weights

rx = colSums(mx) %>% quantile(c(0.60, 0.95))          #     
mx = mx[, colSums(mx) > rx[1] & colSums(mx) < rx[2]]  #

Check the range of weights

par(cex=0.8)
hist(log(mx+1e-4))

6.2 D3heatmap

mx %>% {log(.+1e-4)} %>% t %>% d3heatmap(color=rev(cm.colors(9)))

6.3 熱圖 + 互動式集群分析

這個工具不能投射在網頁上，只能直接在Rstudio裡面做

library(morpheus)
mx %>% {log(.+1e-4)} %>% morpheus

(7) 商店的情緒分佈

將商店投射到尺度縮減之後的情緒平面上

7.1 建立商店情緒矩陣

pcx = sapply(split(R[,10:19], R$bid), colMeans) %>% t
rownames(pcx) = bnames

7.2 尺度縮減

library(FactoMineR)
library(factoextra)
pcx = pcx %>% scale %>% PCA(ncp=10, graph=F)

par(cex=0.8)
barplot(pcx$eig[1:10,3],names=1:10,main="Accumulated Variance",
        xlab="No. Components", ylab="% of Variance")
abline(h=seq(0,100,10),col='lightgray')

前三個主成分已經涵蓋了90%的變異

7.3 情緒平面上的商店

source("bipcx.R")

N = n_distinct(R$bid)
bipcx(pcx,1,2,10,N,t1="Strength",t2="Valence",
      obs='Business', main="Strength & Valence of Sentiment")

bipcx(pcx,3,2,10,N,t1="Arosual",t2="Valence",
      obs='Business', main="Strength & Valence of Sentiment")

(8) 文字分析與字雲

8.1 建立字頻表 (文件字詞矩陣)

library(tm)
dtm = txt$text %>% 
  iconv(to = "utf-8", sub="") %>% 
  VectorSource %>% Corpus %>% 
  tm_map(content_transformer(tolower)) %>% 
  tm_map(removePunctuation) %>% 
  tm_map(removeWords, stopwords("english")) %>% 
  tm_map(stemDocument) %>% 
  DocumentTermMatrix %>% 
  removeSparseTerms(0.998)
dtm  # (documents: 10925, terms: 2520)

<<DocumentTermMatrix (documents: 10866, terms: 2535)>>
Non-/sparse entries: 442136/27103174
Sparsity           : 98%
Maximal term length: 13
Weighting          : term frequency (tf)

8.2 使用`TF-IDF`篩選字詞

library(slam)
tfidf = tapply(dtm$v/row_sums(dtm)[dtm$i], dtm$j, mean) *
  log2(nrow(dtm)/col_sums(dtm > 0))
summary(tfidf)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0505  0.1189  0.1466  0.1616  0.1834  1.3136

dtm = dtm[, tfidf > 0.1471 ]
dtm = dtm[,order(-col_sums(dtm))]
dim(dtm)

[1] 10866  1255

8.3 使用`tSNE`做尺度縮減

library(Rtsne)
n = 800
tsne = dtm[, 1:n] %>% as.data.frame.matrix %>% 
  scale %>% t %>% Rtsne(
    check_duplicates = FALSE, theta=0.0, max_iter=3200)

8.4 層級式集群分析

Y = tsne$Y              # tSNE coordinates
d = dist(Y)             # distance matrix
hc = hclust(d)          # hi-clustering
K = 100                 # number of clusters 
g = cutree(hc,K)        # cut into K clusters
table(g) %>% as.vector %>% sort         # sizes of clusters

  [1]  2  2  2  2  3  3  3  3  3  4  4  4  4  4  4  4  4  4  4  4  5  5  5  5  5  5  5  5  5  5  5
 [32]  5  5  6  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  7  7  8  8  8  8  8  8  8  8  8  8
 [63]  8  8  8  9  9  9 10 10 10 10 11 11 11 11 11 11 11 11 11 12 12 12 12 12 13 13 13 14 14 14 15
 [94] 15 15 15 16 17 18 22

8.5 文字雲

library(randomcoloR)
library(wordcloud)

wc = col_sums(dtm[,1:n])
colors = distinctColorPalette(K)

png("fig/japs2.png", width=3200, height=1800)
textplot(
  Y[,1], Y[,2], colnames(dtm)[1:n], show=F, 
  col=colors[g],
  cex= 0.2 + 1.25 * sqrt(wc/mean(wc)),
  font=2)
dev.off()

png 
  2

(9) 商務屬性 Business Attributes

9.1 `bizatt` 資料框

summary(bizatt)

      bid         AcceptsInsurance  AgesAllowed              Alcohol          BYOB        
 Min.   :     1   Mode :logical    18plus :    86   beer_and_wine:  6444   Mode :logical  
 1st Qu.: 39150   FALSE:2149       19plus :    63   full_bar     : 18767   FALSE:857      
 Median : 78310   TRUE :6426       21plus :   211   none         : 19022   TRUE :48       
 Mean   : 78311   NA's :147684     allages:    35   NA's         :112026   NA's :155354   
 3rd Qu.:117462                    NA's   :155864                                         
 Max.   :156639                                                                           
                                                                                          
      BYOBCorkage     BikeParking     BusinessAcceptsBitcoin BusinessAcceptsCreditCards
 no         :   752   Mode :logical   Mode :logical          Mode :logical             
 yes_corkage:   157   FALSE:16419     FALSE:8663             FALSE:7579                
 yes_free   :   491   TRUE :57064     TRUE :211              TRUE :113145              
 NA's       :154859   NA's :82776     NA's :147385           NA's :35535               
                                                                                       
                                                                                       
                                                                                       
 ByAppointmentOnly   Caters        CoatCheck        Corkage        DogsAllowed     DriveThru      
 Mode :logical     Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:18059       FALSE:17221     FALSE:6887      FALSE:512       FALSE:7642      FALSE:3686     
 TRUE :16714       TRUE :17306     TRUE :1089      TRUE :140       TRUE :3568      TRUE :2352     
 NA's :121486      NA's :121732    NA's :148283    NA's :155607    NA's :145049    NA's :150221   
                                                                                                  
                                                                                                  
                                                                                                  
 GoodForDancing  GoodForKids     HappyHour         HasTV             NoiseLevel     Open24Hours    
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   average  : 26119   Mode :logical  
 FALSE:6470      FALSE:11114     FALSE:2478      FALSE:21116     loud     :  3908   FALSE:318      
 TRUE :2062      TRUE :47232     TRUE :5937      TRUE :22601     quiet    :  8947   TRUE :29       
 NA's :147727    NA's :97913     NA's :147844    NA's :112542    very_loud:  1670   NA's :155912   
                                                                 NA's     :115615                  
                                                                                                   
                                                                                                   
 OutdoorSeating  RestaurantsAttire RestaurantsCounterService RestaurantsDelivery
 Mode :logical   casual: 43277     Mode :logical             Mode :logical      
 FALSE:29680     dressy:  1397     FALSE:150                 FALSE:35526        
 TRUE :20619     formal:   125     TRUE :246                 TRUE :11264        
 NA's :105960    NA's  :111460     NA's :155863              NA's :109469       
                                                                                
                                                                                
                                                                                
 RestaurantsGoodForGroups RestaurantsPriceRange2 RestaurantsReservations RestaurantsTableService
 Mode :logical            Min.   :1              Mode :logical           Mode :logical          
 FALSE:6223               1st Qu.:1              FALSE:28014             FALSE:14318            
 TRUE :43391              Median :2              TRUE :18496             TRUE :25636            
 NA's :106645             Mean   :2              NA's :109749            NA's :116305           
                          3rd Qu.:2                                                             
                          Max.   :4                                                             
                          NA's   :59929                                                         
 RestaurantsTakeOut    Smoking       WheelchairAccessible   WiFi          casual       
 Mode :logical      no     :  3019   Mode :logical        free: 20924   Mode :logical  
 FALSE:5355         outdoor:  3513   FALSE:5815           no  : 21954   FALSE:23947    
 TRUE :49277        yes    :  1140   TRUE :38202          paid:   543   TRUE :18942    
 NA's :101627       NA's   :148587   NA's :112242         NA's:112838   NA's :113370   
                                                                                       
                                                                                       
                                                                                       
   classy          divey          hipster         intimate        romantic        touristy      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:41968     FALSE:26060     FALSE:41903     FALSE:42299     FALSE:42367     FALSE:42658    
 TRUE :921       TRUE :1111      TRUE :963       TRUE :590       TRUE :522       TRUE :231      
 NA's :113370    NA's :129088    NA's :113393    NA's :113370    NA's :113370    NA's :113370   
                                                                                                
                                                                                                
                                                                                                
   trendy         upscale          friday          monday         saturday         sunday       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:40921     FALSE:42492     FALSE:1857      FALSE:5698      FALSE:1726      FALSE:5113     
 TRUE :1968      TRUE :397       TRUE :4534      TRUE :693       TRUE :4665      TRUE :1278     
 NA's :113370    NA's :113370    NA's :149868    NA's :149868    NA's :149868    NA's :149868   
                                                                                                
                                                                                                
                                                                                                
  thursday        tuesday        wednesday         garage           lot            street       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:3841      FALSE:5738      FALSE:5247      FALSE:84470     FALSE:54451     FALSE:72745    
 TRUE :2550      TRUE :653       TRUE :1144      TRUE :4950      TRUE :34969     TRUE :16675    
 NA's :149868    NA's :149868    NA's :149868    NA's :66839     NA's :66839     NA's :66839    
                                                                                                
                                                                                                
                                                                                                
   valet         validated       dairy-free      gluten-free       halal           kosher       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:87761     FALSE:86762     FALSE:231       FALSE:225       FALSE:243       FALSE:245      
 TRUE :1659      TRUE :440       TRUE :18        TRUE :24        TRUE :6         TRUE :4        
 NA's :66839     NA's :69057     NA's :156010    NA's :156010    NA's :156010    NA's :156010   
                                                                                                
                                                                                                
                                                                                                
  soy-free         vegan         vegetarian      breakfast         brunch         dessert       
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:243       FALSE:128       FALSE:177       FALSE:39214     FALSE:39388     FALSE:41418    
 TRUE :6         TRUE :121       TRUE :72        TRUE :3726      TRUE :3552      TRUE :1522     
 NA's :156010    NA's :156010    NA's :156010    NA's :113319    NA's :113319    NA's :113319   
                                                                                                
                                                                                                
                                                                                                
   dinner        latenight         lunch         africanamerican   asian          coloring      
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:27638     FALSE:40462     FALSE:25109     FALSE:751       FALSE:598       FALSE:279      
 TRUE :15302     TRUE :2478      TRUE :17831     TRUE :423       TRUE :576       TRUE :1126     
 NA's :113319    NA's :113319    NA's :113319    NA's :155085    NA's :155085    NA's :154854   
                                                                                                
                                                                                                
                                                                                                
   curly         extensions         kids           perms         straightperms   background_music
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   
 FALSE:399       FALSE:570       FALSE:693       FALSE:745       FALSE:672       FALSE:6193      
 TRUE :1006      TRUE :835       TRUE :712       TRUE :660       TRUE :502       TRUE :1833      
 NA's :154854    NA's :154854    NA's :154854    NA's :154854    NA's :155085    NA's :148233    
                                                                                                 
                                                                                                 
                                                                                                 
     dj           jukebox         karaoke           live          no_music         video        
 Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
 FALSE:6312      FALSE:6895      FALSE:7785      FALSE:6621      FALSE:8026      FALSE:7819     
 TRUE :1714      TRUE :1131      TRUE :241       TRUE :1405      NA's :148233    TRUE :207      
 NA's :148233    NA's :148233    NA's :148233    NA's :148233                    NA's :148233

bizatt$RestaurantsPriceRange2[ X[,"Restaurants"] ] %>% table(useNA="ifany")

.
    1     2     3     4  <NA> 
19013 24917  2845   537  4299

grep("mex|ita",colnames(X),T, value=T)

 [1] "Italian"                "Mexican"                "Tex-Mex"               
 [4] "Hospitals"              "Vitamins & Supplements" "Rehabilitation Center" 
 [7] "Guitar Stores"          "Meditation Centers"     "Emergency Pet Hospital"
[10] "New Mexican Cuisine"

italian = X[,"Italian"] & X[,"Restaurants"]
mexican = X[,"Mexican"] & X[,"Restaurants"]
c(sum(italian), sum(mexican), sum(italian & mexican))

[1] 4410 3913   25

bizatt$RestaurantsPriceRange2[ italian ] %>% table(useNA="ifany")

.
   1    2    3    4 <NA> 
 788 2698  519   70  335

bizatt$RestaurantsPriceRange2[ mexican ] %>% table(useNA="ifany")

.
   1    2    3    4 <NA> 
2286 1388   41    5  193

AS12 YelpDemo - 資料分析示範 (2)

卓雍然 D994010001

2018-08-30 10:58:45

Load Data

Merge sentiment score into `rev`

(1) 評論數量最多的100個商店

1.1 Top100 bid

1.2 Summarise reviews by bid and year

1.3 Biz that have review every year after 2009

1.4 Google Motion Chart

(2) 商業類別的動態比較

2.1 依商業類別彙總評論資料

2.2 比較討論聲量相似的商業類別

(3) Japanese 商業類別

3.1 設定商店評論選擇條件

3.2 選出評論

(4) 從平台取出評論內容：`txt`

從主機取出話題評分

(5) 聲量、星等、互動、情緒

5.1 商店間的動態比較

5.2 加入店名、去除極端值

5.3 星等上升、下降最快的商店

(6) 商店與話題

6.1 商店話題矩陣與熱圖

6.2 D3heatmap

6.3 熱圖 + 互動式集群分析

(7) 商店的情緒分佈

7.1 建立商店情緒矩陣

7.2 尺度縮減

7.3 情緒平面上的商店

(8) 文字分析與字雲

8.1 建立字頻表 (文件字詞矩陣)

8.2 使用`TF-IDF`篩選字詞

8.3 使用`tSNE`做尺度縮減

8.4 層級式集群分析

8.5 文字雲

(9) 商務屬性 Business Attributes

9.1 `bizatt` 資料框

AS12 YelpDemo - 資料分析示範 (2)

卓雍然 D994010001

2018-08-30 10:58:45

Load Data

Merge sentiment score into rev

(1) 評論數量最多的100個商店

1.1 Top100 bid

1.2 Summarise reviews by bid and year

1.3 Biz that have review every year after 2009

1.4 Google Motion Chart

(2) 商業類別的動態比較

2.1 依商業類別彙總評論資料

2.2 比較討論聲量相似的商業類別

(3) Japanese 商業類別

3.1 設定商店評論選擇條件

3.2 選出評論

(4) 從平台取出評論內容：txt

從主機取出話題評分

(5) 聲量、星等、互動、情緒

5.1 商店間的動態比較

5.2 加入店名、去除極端值

5.3 星等上升、下降最快的商店

(6) 商店與話題

6.1 商店話題矩陣與熱圖

6.2 D3heatmap

6.3 熱圖 + 互動式集群分析

(7) 商店的情緒分佈

7.1 建立商店情緒矩陣

7.2 尺度縮減

7.3 情緒平面上的商店

(8) 文字分析與字雲

8.1 建立字頻表 (文件字詞矩陣)

8.2 使用TF-IDF篩選字詞

8.3 使用tSNE做尺度縮減

8.4 層級式集群分析

8.5 文字雲

(9) 商務屬性 Business Attributes

9.1 bizatt 資料框

Merge sentiment score into `rev`

(4) 從平台取出評論內容：`txt`

8.2 使用`TF-IDF`篩選字詞

8.3 使用`tSNE`做尺度縮減

9.1 `bizatt` 資料框