A sample data is loaded from csv file. We use sqldf package to apply sql commands in R objects.
library(sqldf)
library(readr)
library(dplyr)
orders <- read_csv("Orders (1) (1).csv")
orders <- orders %>% mutate(date_local = as.character(date_local))
vendors <- read_csv("Vendors (1) (1).csv")
orders## # A tibble: 100 x 8
## rdbms_id country_name date_local vendor_id customer_id gmv_local
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 18 Taiwan 2012-10-02 2870 3983 714.
## 2 18 Taiwan 2012-10-02 2870 3977 629.
## 3 18 Taiwan 2012-10-02 2870 3010 270.
## 4 18 Taiwan 2012-10-02 3812 3983 551.
## 5 18 Taiwan 2012-10-02 3812 3524 45
## 6 15 Singapore 2012-10-02 13 4322 119.
## 7 17 Thailand 2012-10-02 227 1647 576
## 8 17 Thailand 2012-10-02 227 1885 816.
## 9 17 Thailand 2012-10-02 227 326 41
## 10 17 Thailand 2012-10-02 227 47 225.
## # ... with 90 more rows, and 2 more variables: is_voucher_used <lgl>,
## # is_successful_order <lgl>
vendors## # A tibble: 7 x 7
## id rdbms_id country_name is_active vendor_name budget chain_id
## <dbl> <dbl> <chr> <lgl> <chr> <dbl> <dbl>
## 1 1020 15 Singapore TRUE WS (BM) 1 118
## 2 941 15 Singapore FALSE Canada Paste 1 NA
## 3 2870 18 Taiwan FALSE I LOVE PIZZAAAAAAAAAAA 1 NA
## 4 606 15 Singapore FALSE PastaPastaPasta 3 NA
## 5 3812 18 Taiwan TRUE Sweet Chinese Desserts 2 18
## 6 13 15 Singapore TRUE IC House 2 12
## 7 227 17 Thailand TRUE 9th Cafe House 3 212
### 1 > Find records of only Taiwan ###
sqldf("
SELECT *
FROM orders
WHERE country_name = 'Taiwan'
")## rdbms_id country_name date_local vendor_id customer_id gmv_local
## 1 18 Taiwan 2012-10-02 2870 3983 713.700
## 2 18 Taiwan 2012-10-02 2870 3977 629.200
## 3 18 Taiwan 2012-10-02 2870 3010 270.300
## 4 18 Taiwan 2012-10-02 3812 3983 551.100
## 5 18 Taiwan 2012-10-02 3812 3524 45.000
## 6 18 Taiwan 2014-10-02 3812 15797 1228.500
## 7 18 Taiwan 2014-10-02 3812 34799 732.435
## 8 18 Taiwan 2014-10-02 3812 15089 2108.410
## 9 18 Taiwan 2014-10-02 3812 17145 1783.300
## 10 18 Taiwan 2014-10-02 3812 39968 4029.620
## 11 18 Taiwan 2014-10-02 3812 26270 1521.000
## 12 18 Taiwan 2014-10-02 3812 38333 717.060
## 13 18 Taiwan 2014-10-02 3812 23726 267.120
## 14 18 Taiwan 2014-10-02 3812 43010 134.200
## 15 18 Taiwan 2014-10-02 2870 43004 234.530
## 16 18 Taiwan 2014-10-02 2870 43008 2532.330
## 17 18 Taiwan 2014-10-02 2870 29259 17.760
## 18 18 Taiwan 2014-10-02 3812 42962 19.590
## 19 18 Taiwan 2014-10-02 3812 42901 332.150
## 20 18 Taiwan 2014-10-02 3812 11739 21.925
## is_voucher_used is_successful_order
## 1 FALSE FALSE
## 2 FALSE TRUE
## 3 FALSE FALSE
## 4 FALSE FALSE
## 5 FALSE TRUE
## 6 FALSE TRUE
## 7 FALSE TRUE
## 8 FALSE TRUE
## 9 FALSE TRUE
## 10 FALSE TRUE
## 11 FALSE TRUE
## 12 FALSE TRUE
## 13 FALSE TRUE
## 14 FALSE TRUE
## 15 FALSE TRUE
## 16 FALSE FALSE
## 17 FALSE TRUE
## 18 FALSE TRUE
## 19 FALSE FALSE
## 20 FALSE TRUE
### 2 > Find Total GMv for each country ###
sqldf("
SELECT
country_name,
SUM(gmv_local) AS total_GMV
FROM orders
GROUP BY country_name
ORDER BY 2 DESC
")## country_name total_GMV
## 1 Taiwan 17889.230
## 2 Singapore 3424.749
## 3 Thailand 1658.261
### 3 > Find the Top vendor for each country according to TOtal GMV ###
sqldf("
SELECT country_name, vendor_name, total_GMV
FROM
(SELECT
orders.country_name,
vendors.vendor_name,
SUM(gmv_local) AS total_GMV,
DENSE_RANK() OVER (PARTITION BY orders.country_name ORDER BY SUM(orders.gmv_local) DESC ) AS rank
FROM orders
LEFT JOIN vendors
ON orders.vendor_id = vendors.id
GROUP BY orders.country_name, vendors.vendor_name)
WHERE rank = 1;
")## country_name vendor_name total_GMV
## 1 Singapore IC House 1924.956
## 2 Taiwan Sweet Chinese Desserts 13491.410
## 3 Thailand 9th Cafe House 1658.261
### 4 > Find Top 2 vendors for each country, by each year ###
sqldf("
SELECT Year, country_name, vendor_name, total_GMV
FROM
(SELECT
strftime('%Y',date_local) as Year,
orders.country_name,
vendors.vendor_name,
SUM(gmv_local) AS total_GMV,
DENSE_RANK() OVER (PARTITION BY orders.country_name, strftime('%Y', orders.date_local) ORDER BY SUM(orders.gmv_local) DESC ) AS rank
FROM orders
LEFT JOIN vendors
ON orders.vendor_id = vendors.id
GROUP BY orders.country_name, vendors.vendor_name, strftime('%Y', orders.date_local)
)
WHERE rank in (1, 2)
ORDER BY Year, country_name ;
")## Year country_name vendor_name total_GMV
## 1 2012 Singapore IC House 119.3390
## 2 2012 Singapore Canada Paste 89.6380
## 3 2012 Taiwan I LOVE PIZZAAAAAAAAAAA 1613.2000
## 4 2012 Taiwan Sweet Chinese Desserts 596.1000
## 5 2012 Thailand 9th Cafe House 1658.2605
## 6 2014 Singapore IC House 1805.6170
## 7 2014 Singapore WS (BM) 768.8302
## 8 2014 Taiwan Sweet Chinese Desserts 12895.3100
## 9 2014 Taiwan I LOVE PIZZAAAAAAAAAAA 2784.6200