library(knitr); opts_chunk$set(comment = NA)

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(d3heatmap)
## Warning: package 'd3heatmap' was built under R version 3.5.3
library(googleVis)
## Warning: package 'googleVis' was built under R version 3.5.3
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
## 
## Welcome to googleVis version 0.6.3
## 
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
## 
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
## 
## See the googleVis package vignettes for more details,
## or visit https://github.com/mages/googleVis.
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
options(gvis.plot.tag='chart', digits=4, scipen=60)
#options(width=100, digits=4, scipen=60)
#op <- options(gvis.plot.tag='chart')

載入資料

# Customer = read.csv("olist_customers_dataset.csv")
# Geo = read.csv("olist_geolocation_dataset.csv")
# Item = read.csv("olist_order_items_dataset.csv") #每筆訂單可以有多個商品
# Pay = read.csv("olist_order_payments_dataset.csv")
# Reviews = read.csv("olist_order_reviews_dataset.csv")
# Order = read.csv("olist_orders_dataset.csv")
# Product = read.csv("olist_products_dataset.csv")
# Seller = read.csv("olist_sellers_dataset.csv")
# CatName = read.csv("product_category_name_translation.csv")
# MQL = read.csv("olist_marketing_qualified_leads_dataset.csv")
# Deal = read.csv("olist_closed_deals_dataset.csv")
load("origin.RData") #load 原始資料
Order = merge(Order,Pay) # 一筆是一個訂單
Order = merge(Order,Customer)  # 一筆是一個訂單

Order$order_purchase_timestamp = as.POSIXct(Order$order_purchase_timestamp, format = "%Y-%m-%d %H:%M:%S")

min(Order$order_purchase_timestamp)
[1] "2016-09-04 21:15:19 CST"
max(Order$order_purchase_timestamp)
[1] "2018-10-17 17:30:18 CST"
hist(Order$order_purchase_timestamp, "month", las = 2,freq = T,xlab = "")

table(format(Order$order_purchase_timestamp, "%u"), format(Order$order_purchase_timestamp, "%H")) %>%
  as.data.frame.matrix %>%
  d3heatmap(F,F,col=colorRamp(c('seagreen','lightyellow','red')))

產品銷售資訊

names(CatName)[1] = "product_category_name"
Product = merge(Product,CatName)
ProductSell = merge(Order,Item) #一筆 是一個產品的銷售紀錄
ProductSell = merge(ProductSell,Product) #一筆 是一個產品的銷售紀錄

商品種類的實際需求數量

ProductSell %>% 
  group_by(product_category_name_english) %>% 
  summarise(
    n = n()
  ) %>% arrange(desc(n))
ProductSell = left_join(Seller,ProductSell) #一筆是一個賣家的產品銷售紀錄
Joining, by = "seller_id"
#用left join 保留Seller資料  因為可能有Seller沒有產品銷售紀錄

沒有銷售紀錄的商家

sum(is.na(ProductSell$product_id))
[1] 62
#有62家

subset(ProductSell,is.na(product_id)) 
length(unique(ProductSell$seller_id))
[1] 3095
#在銷售紀錄中 共有3095個不同的賣家
#但Deal資料集中 只有842位賣家的資料(看起來是透過網頁註冊的賣家才會放在這個資料集)
D = merge(ProductSell,Pay)

Reviews資料集 有點奇怪 一則評論是針對一筆訂單 而不是一個產品 這樣就沒辦法看哪種產品的review score高 所以先不使用Review

table(Item$order_item_id)

    1     2     3     4     5     6     7     8     9    10    11    12 
98666  9803  2287   965   460   256    58    36    28    25    17    13 
   13    14    15    16    17    18    19    20    21 
    8     7     5     3     3     3     3     3     1 
#所有訂單中 訂購的商品數量
#只有購買一個商品的訂單
One_product = Item %>%
        group_by(order_id) %>% 
        summarise(product_num = n_distinct(order_item_id)) %>% 
        filter(product_num == 1)

先算出每種商品種類的review_score

X = ProductSell %>%
    filter(order_id %in% One_product$order_id) %>%  #篩選出只有購買一個商品的訂單
    group_by(product_category_name_english) %>% 
    summarise(
      avg_log_price = log(mean(price),10),
      total_log_price = log(sum(price),10),
      total_sold_num = n(),
      total_seller_num = n_distinct(seller_id)
    ) %>% mutate()
vis = D %>%
  filter(order_id %in% One_product$order_id) %>%
  merge(Reviews) %>% 
  group_by(product_category_name_english) %>%
  summarise(
    total_review_num = n(), #評論數量
    avg_review_score = mean(review_score)) %>% 
  merge(X) %>% 
  mutate(dummy = 2018)
vistmp = gvisMotionChart(
  vis, "product_category_name_english", "dummy",
  options=list(width=800, height=600, title="Product category") )

vistmp
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "https://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="https://www.w3.org/1999/xhtml">
<head>
<title>MotionChartID147c79bc69a8</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8" />
<style type="text/css">
body {
  color: #444444;
  font-family: Arial,Helvetica,sans-serif;
  font-size: 75%;
  }
  a {
  color: #4D87C7;
  text-decoration: none;
}
</style>
</head>
<body>
 <!-- MotionChart generated in R 3.5.1 by googleVis 0.6.3 package -->
<!-- Tue Apr 16 15:43:29 2019 -->


<!-- jsHeader -->
<script type="text/javascript">
 
// jsData 
function gvisDataMotionChartID147c79bc69a8 () {
var data = new google.visualization.DataTable();
var datajson =
[
 [
"agro_industry_and_commerce",
2018,
182,
4.104395604,
2.533301748,
4.793373136,
182,
33
],
[
"air_conditioning",
2018,
219,
4.03196347,
2.249722022,
4.590166136,
219,
48
],
[
"art",
2018,
202,
4.059405941,
2.069230923,
4.374582293,
202,
34
],
[
"arts_and_craftmanship",
2018,
20,
4.35,
1.935335933,
3.236365928,
20,
11
],
[
"audio",
2018,
348,
3.844827586,
2.157236621,
4.697566096,
347,
35
],
[
"auto",
2018,
3756,
4.113951012,
2.158829743,
5.731933526,
3742,
364
],
[
"baby",
2018,
2792,
4.076647564,
2.162169461,
5.606838692,
2784,
229
],
[
"bed_bath_table",
2018,
8569,
4.026490839,
1.991772805,
5.91945107,
8466,
184
],
[
"books_general_interest",
2018,
488,
4.467213115,
1.950815381,
4.639235203,
488,
54
],
[
"books_imported",
2018,
45,
4.288888889,
1.897138159,
3.550350672,
45,
5
],
[
"books_technical",
2018,
254,
4.437007874,
1.854357379,
4.25575792,
252,
26
],
[
"cds_dvds_musicals",
2018,
10,
4.7,
1.740362689,
2.740362689,
10,
1
],
[
"christmas_supplies",
2018,
116,
4.120689655,
1.789888139,
3.854346129,
116,
16
],
[
"cine_photo",
2018,
61,
4.37704918,
2.02260156,
3.80075281,
60,
13
],
[
"computers",
2018,
180,
4.183333333,
3.07055438,
5.325826885,
180,
8
],
[
"computers_accessories",
2018,
6142,
4.095897102,
2.069422819,
5.854610239,
6098,
277
],
[
"consoles_games",
2018,
1042,
4.080614203,
2.169403374,
5.186854103,
1041,
80
],
[
"construction_tools_construction",
2018,
645,
4.125581395,
2.254680661,
5.064240376,
645,
118
],
[
"construction_tools_lights",
2018,
201,
4.28358209,
2.212495764,
4.51352576,
200,
31
],
[
"construction_tools_safety",
2018,
152,
3.861842105,
2.357215189,
4.539058777,
152,
33
],
[
"cool_stuff",
2018,
3631,
4.197190857,
2.236049619,
5.794878144,
3621,
255
],
[
"costruction_tools_garden",
2018,
157,
4.452229299,
2.055128901,
4.245460599,
155,
27
],
[
"costruction_tools_tools",
2018,
93,
4.35483871,
2.150829039,
4.119311988,
93,
23
],
[
"diapers_and_hygiene",
2018,
23,
4.043478261,
1.69578232,
3.057510156,
23,
8
],
[
"drinks",
2018,
252,
4.285714286,
1.775583,
4.176983541,
252,
30
],
[
"dvds_blu_ray",
2018,
60,
4.066666667,
2.012477272,
3.790628522,
60,
9
],
[
"electronics",
2018,
2442,
4.133906634,
1.767356933,
5.154746759,
2440,
141
],
[
"fashio_female_clothing",
2018,
36,
3.75,
1.818584875,
3.350063792,
34,
10
],
[
"fashion_bags_accessories",
2018,
1825,
4.209863014,
1.893447916,
5.15184572,
1813,
98
],
[
"fashion_childrens_clothes",
2018,
6,
4.333333333,
1.911929593,
2.690080844,
6,
4
],
[
"fashion_male_clothing",
2018,
105,
3.695238095,
1.907735272,
3.928924571,
105,
13
],
[
"fashion_shoes",
2018,
230,
4.165217391,
1.991435643,
4.345544082,
226,
14
],
[
"fashion_sport",
2018,
25,
4.44,
1.88342956,
3.263640802,
24,
9
],
[
"fashion_underwear_beach",
2018,
127,
3.913385827,
1.878891195,
3.982694916,
127,
9
],
[
"fixed_telephony",
2018,
197,
3.944162437,
2.179343058,
4.47159913,
196,
45
],
[
"flowers",
2018,
23,
4.52173913,
1.501083088,
2.862810924,
23,
3
],
[
"food",
2018,
418,
4.303827751,
1.781888619,
4.4030649,
418,
52
],
[
"food_drink",
2018,
203,
4.389162562,
1.811935507,
4.117286877,
202,
32
],
[
"furniture_bedroom",
2018,
84,
4.238095238,
2.280595921,
4.199674014,
83,
21
],
[
"furniture_decor",
2018,
5397,
4.128589957,
2.005443179,
5.733390888,
5345,
330
],
[
"furniture_living_room",
2018,
379,
4.073878628,
2.165038528,
4.741379878,
377,
51
],
[
"furniture_mattress_and_upholstery",
2018,
41,
3.804878049,
2.071691659,
3.684475516,
41,
5
],
[
"garden_tools",
2018,
3101,
4.1889713,
2.126761941,
5.616860946,
3091,
220
],
[
"health_beauty",
2018,
8467,
4.191449156,
2.143770139,
6.068721028,
8413,
470
],
[
"home_appliances",
2018,
812,
4.151477833,
2.024901364,
4.913642325,
774,
47
],
[
"home_appliances_2",
2018,
262,
4.198473282,
2.675363362,
5.088663126,
259,
45
],
[
"home_comfort_2",
2018,
18,
4.222222222,
1.526820124,
2.782092629,
18,
4
],
[
"home_confort",
2018,
364,
4.065934066,
2.172361332,
4.732267957,
363,
18
],
[
"home_construction",
2018,
422,
4.139810427,
2.214582226,
4.838864322,
421,
61
],
[
"housewares",
2018,
5424,
4.192109145,
2.000020002,
5.732735343,
5404,
443
],
[
"industry_commerce_and_business",
2018,
207,
4.260869565,
2.228715606,
4.542582826,
206,
29
],
[
"kitchen_dining_laundry_garden_furniture",
2018,
220,
4.086363636,
2.237989014,
4.578433129,
219,
46
],
[
"la_cuisine",
2018,
12,
4.916666667,
2.191026947,
3.270208193,
12,
2
],
[
"luggage_accessories",
2018,
1023,
4.358748778,
2.121354751,
5.131230384,
1023,
70
],
[
"market_place",
2018,
272,
4.091911765,
1.970089521,
4.404658425,
272,
56
],
[
"music",
2018,
38,
4.368421053,
2.211228068,
3.791011665,
38,
18
],
[
"musical_instruments",
2018,
620,
4.187096774,
2.507403395,
5.299795084,
620,
64
],
[
"office_furniture",
2018,
1077,
3.74001857,
2.239990453,
5.270590175,
1073,
31
],
[
"party_supplies",
2018,
37,
4.351351351,
2.062640923,
3.630842647,
37,
11
],
[
"perfumery",
2018,
3091,
4.195082498,
2.090765268,
5.577195747,
3065,
166
],
[
"pet_shop",
2018,
1585,
4.30977918,
2.075128897,
5.274609812,
1583,
130
],
[
"security_and_services",
2018,
2,
2.5,
2.151201249,
2.452231244,
2,
2
],
[
"signaling_and_security",
2018,
107,
4.130841121,
2.052747146,
4.082130923,
107,
35
],
[
"small_appliances",
2018,
608,
4.189144737,
2.4912577,
5.272294639,
604,
105
],
[
"small_appliances_home_oven_and_coffee",
2018,
76,
4.302631579,
2.800352632,
4.681166225,
76,
15
],
[
"sports_leisure",
2018,
7246,
4.212945073,
2.089523132,
5.946795306,
7199,
469
],
[
"stationery",
2018,
2202,
4.288828338,
1.989827476,
5.331459812,
2196,
166
],
[
"tablets_printing_image",
2018,
76,
4.105263158,
1.956510275,
3.837323867,
76,
6
],
[
"telephony",
2018,
4066,
4.047958682,
1.869179011,
5.477812001,
4061,
145
],
[
"toys",
2018,
3795,
4.211330698,
2.086916651,
5.665211956,
3787,
246
],
[
"watches_gifts",
2018,
5531,
4.08967637,
2.332394304,
6.074333381,
5520,
101
] 
];
data.addColumn('string','product_category_name_english');
data.addColumn('number','dummy');
data.addColumn('number','total_review_num');
data.addColumn('number','avg_review_score');
data.addColumn('number','avg_log_price');
data.addColumn('number','total_log_price');
data.addColumn('number','total_sold_num');
data.addColumn('number','total_seller_num');
data.addRows(datajson);
return(data);
}
 
// jsDrawChart
function drawChartMotionChartID147c79bc69a8() {
var data = gvisDataMotionChartID147c79bc69a8();
var options = {};
options["width"] = 800;
options["height"] = 600;
options["state"] = "";
options["title"] = "Product category";

    var chart = new google.visualization.MotionChart(
    document.getElementById('MotionChartID147c79bc69a8')
    );
    chart.draw(data,options);
    

}
  
 
// jsDisplayChart
(function() {
var pkgs = window.__gvisPackages = window.__gvisPackages || [];
var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
var chartid = "motionchart";
  
// Manually see if chartid is in pkgs (not all browsers support Array.indexOf)
var i, newPackage = true;
for (i = 0; newPackage && i < pkgs.length; i++) {
if (pkgs[i] === chartid)
newPackage = false;
}
if (newPackage)
  pkgs.push(chartid);
  
// Add the drawChart function to the global list of callbacks
callbacks.push(drawChartMotionChartID147c79bc69a8);
})();
function displayChartMotionChartID147c79bc69a8() {
  var pkgs = window.__gvisPackages = window.__gvisPackages || [];
  var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
  window.clearTimeout(window.__gvisLoad);
  // The timeout is set to 100 because otherwise the container div we are
  // targeting might not be part of the document yet
  window.__gvisLoad = setTimeout(function() {
  var pkgCount = pkgs.length;
  google.load("visualization", "1", { packages:pkgs, callback: function() {
  if (pkgCount != pkgs.length) {
  // Race condition where another setTimeout call snuck in after us; if
  // that call added a package, we must not shift its callback
  return;
}
while (callbacks.length > 0)
callbacks.shift()();
} });
}, 100);
}
 
// jsFooter
</script>
 
<!-- jsChart -->  
<script type="text/javascript" src="https://www.google.com/jsapi?callback=displayChartMotionChartID147c79bc69a8"></script>
 
<!-- divChart -->
  
<div id="MotionChartID147c79bc69a8" 
  style="width: 800; height: 600;">
</div>
 <div><span>Data: vis &#8226; Chart ID: <a href="Chart_MotionChartID147c79bc69a8.html">MotionChartID147c79bc69a8</a> &#8226; <a href="https://github.com/mages/googleVis">googleVis-0.6.3</a></span><br /> 
<!-- htmlFooter -->
<span> 
  R version 3.5.1 (2018-07-02) 
  &#8226; <a href="https://developers.google.com/terms/">Google Terms of Use</a> &#8226; <a href="https://google-developers.appspot.com/chart/interactive/docs/gallery/motionchart">Documentation and Data Policy</a>
</span></div>
</body>
</html>