Data

A sample data is loaded from csv file. We use sqldf package to apply sql commands in R objects.

library(sqldf)
library(readr)
library(dplyr)

orders <- read_csv("Orders (1) (1).csv")
orders <- orders %>% mutate(date_local = as.character(date_local))
vendors <- read_csv("Vendors (1) (1).csv")

orders
## # A tibble: 100 x 8
##    rdbms_id country_name date_local vendor_id customer_id gmv_local
##       <dbl> <chr>        <chr>          <dbl>       <dbl>     <dbl>
##  1       18 Taiwan       2012-10-02      2870        3983      714.
##  2       18 Taiwan       2012-10-02      2870        3977      629.
##  3       18 Taiwan       2012-10-02      2870        3010      270.
##  4       18 Taiwan       2012-10-02      3812        3983      551.
##  5       18 Taiwan       2012-10-02      3812        3524       45 
##  6       15 Singapore    2012-10-02        13        4322      119.
##  7       17 Thailand     2012-10-02       227        1647      576 
##  8       17 Thailand     2012-10-02       227        1885      816.
##  9       17 Thailand     2012-10-02       227         326       41 
## 10       17 Thailand     2012-10-02       227          47      225.
## # ... with 90 more rows, and 2 more variables: is_voucher_used <lgl>,
## #   is_successful_order <lgl>
vendors
## # A tibble: 7 x 7
##      id rdbms_id country_name is_active vendor_name            budget chain_id
##   <dbl>    <dbl> <chr>        <lgl>     <chr>                   <dbl>    <dbl>
## 1  1020       15 Singapore    TRUE      WS (BM)                     1      118
## 2   941       15 Singapore    FALSE     Canada Paste                1       NA
## 3  2870       18 Taiwan       FALSE     I LOVE PIZZAAAAAAAAAAA      1       NA
## 4   606       15 Singapore    FALSE     PastaPastaPasta             3       NA
## 5  3812       18 Taiwan       TRUE      Sweet Chinese Desserts      2       18
## 6    13       15 Singapore    TRUE      IC House                    2       12
## 7   227       17 Thailand     TRUE      9th Cafe House              3      212

WHERE clause

### 1 > Find records of only Taiwan ###
sqldf("
      SELECT * 
      FROM orders 
      WHERE country_name = 'Taiwan'
      ")
##    rdbms_id country_name date_local vendor_id customer_id gmv_local
## 1        18       Taiwan 2012-10-02      2870        3983   713.700
## 2        18       Taiwan 2012-10-02      2870        3977   629.200
## 3        18       Taiwan 2012-10-02      2870        3010   270.300
## 4        18       Taiwan 2012-10-02      3812        3983   551.100
## 5        18       Taiwan 2012-10-02      3812        3524    45.000
## 6        18       Taiwan 2014-10-02      3812       15797  1228.500
## 7        18       Taiwan 2014-10-02      3812       34799   732.435
## 8        18       Taiwan 2014-10-02      3812       15089  2108.410
## 9        18       Taiwan 2014-10-02      3812       17145  1783.300
## 10       18       Taiwan 2014-10-02      3812       39968  4029.620
## 11       18       Taiwan 2014-10-02      3812       26270  1521.000
## 12       18       Taiwan 2014-10-02      3812       38333   717.060
## 13       18       Taiwan 2014-10-02      3812       23726   267.120
## 14       18       Taiwan 2014-10-02      3812       43010   134.200
## 15       18       Taiwan 2014-10-02      2870       43004   234.530
## 16       18       Taiwan 2014-10-02      2870       43008  2532.330
## 17       18       Taiwan 2014-10-02      2870       29259    17.760
## 18       18       Taiwan 2014-10-02      3812       42962    19.590
## 19       18       Taiwan 2014-10-02      3812       42901   332.150
## 20       18       Taiwan 2014-10-02      3812       11739    21.925
##    is_voucher_used is_successful_order
## 1            FALSE               FALSE
## 2            FALSE                TRUE
## 3            FALSE               FALSE
## 4            FALSE               FALSE
## 5            FALSE                TRUE
## 6            FALSE                TRUE
## 7            FALSE                TRUE
## 8            FALSE                TRUE
## 9            FALSE                TRUE
## 10           FALSE                TRUE
## 11           FALSE                TRUE
## 12           FALSE                TRUE
## 13           FALSE                TRUE
## 14           FALSE                TRUE
## 15           FALSE                TRUE
## 16           FALSE               FALSE
## 17           FALSE                TRUE
## 18           FALSE                TRUE
## 19           FALSE               FALSE
## 20           FALSE                TRUE

GROUP BY clause

### 2 > Find Total GMv for each country ###
sqldf("
      SELECT 
      country_name, 
      SUM(gmv_local) AS total_GMV 
      FROM orders 
      GROUP BY country_name 
      ORDER BY 2 DESC
      ")
##   country_name total_GMV
## 1       Taiwan 17889.230
## 2    Singapore  3424.749
## 3     Thailand  1658.261

DENSE_RANK, LEFT JOIN and SUB-QUERY

### 3 > Find the Top vendor for each country according to TOtal GMV ###
sqldf("
 SELECT country_name, vendor_name, total_GMV 
 FROM
      (SELECT 
      orders.country_name, 
      vendors.vendor_name,
      SUM(gmv_local) AS total_GMV,
      DENSE_RANK() OVER (PARTITION BY orders.country_name ORDER BY SUM(orders.gmv_local) DESC ) AS rank
      FROM orders 
      LEFT JOIN vendors
      ON orders.vendor_id = vendors.id
      GROUP BY orders.country_name, vendors.vendor_name)
 WHERE rank = 1;
      ")
##   country_name            vendor_name total_GMV
## 1    Singapore               IC House  1924.956
## 2       Taiwan Sweet Chinese Desserts 13491.410
## 3     Thailand         9th Cafe House  1658.261

EXTRACT Date component, ORDER BY

### 4 > Find Top 2 vendors for each country, by each year ###
sqldf("
   SELECT Year, country_name, vendor_name, total_GMV 
   FROM
      (SELECT 
      strftime('%Y',date_local) as Year,
      orders.country_name,
      vendors.vendor_name,
      SUM(gmv_local) AS total_GMV,
      DENSE_RANK() OVER (PARTITION BY orders.country_name, strftime('%Y', orders.date_local) ORDER BY SUM(orders.gmv_local) DESC ) AS rank
      FROM orders 
      LEFT JOIN vendors 
      ON orders.vendor_id = vendors.id
      GROUP BY orders.country_name, vendors.vendor_name, strftime('%Y', orders.date_local)
   )
   WHERE rank in (1, 2)
   ORDER BY Year, country_name ;
      ")
##   Year country_name            vendor_name  total_GMV
## 1 2012    Singapore               IC House   119.3390
## 2 2012    Singapore           Canada Paste    89.6380
## 3 2012       Taiwan I LOVE PIZZAAAAAAAAAAA  1613.2000
## 4 2012       Taiwan Sweet Chinese Desserts   596.1000
## 5 2012     Thailand         9th Cafe House  1658.2605
## 6 2014    Singapore               IC House  1805.6170
## 7 2014    Singapore                WS (BM)   768.8302
## 8 2014       Taiwan Sweet Chinese Desserts 12895.3100
## 9 2014       Taiwan I LOVE PIZZAAAAAAAAAAA  2784.6200