Basis Data

Ujian Akhir Semester


Kontak : \(\downarrow\)
Email
Instagram https://www.instagram.com/arifin.alicia/
RPubs https://rpubs.com/aliciaarifin/
Nama Alicia Arifin
NIM 20214920001
Prodi Statistika, 2021

Soal UAS

  • Buat kueri untuk mendapatkan jumlah pengguna unik, jumlah pesanan, dan total harga jual per status dan Bulan!
  • Buat kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik dengan status selesai dikelompokkan berdasarkan Bulan!
  • Bandingkan total oder Complete, Cancel, Returned per bulan dalam 1 grafik
  • Visualisasi Data Customer berdasarkan country dengan pie chart
  • Visualisasikan berdasarkan date, status,total_unique_users,total_orders, total_sale_price!
  • Berapa banyak yang kita jual setiap bulannya? Apakah tinggi atau rendah dibandingkan bulan lalu?
  • Dari negara manakah pelanggan utama e-commerce tersebut berasal? Kelompok Gender dan Usia manakah yang memperoleh keuntungan paling besar?
  • Merek dan kategori produk apa yang paling banyak terjual dan perlihatkan revenuenya!
  • Merek dan kategori produk apa yang paling banyak dibatalkan dan dikembalikan?
  • Buatlah kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik yang statusnya selesai dikelompokkan berdasarkan bulan!
    Soal Tmabahan (Optional)
  • Buatlah Analisis Regresi dan Analisis Clustering menggunakan Database diatas!

Prepare Data

Diagram of
Diagram of

Connect to MySQLite and Library

library(DBI)
library(RSQLite)
library(DT)
library(highcharter)
library(dplyr)
library(viridis)
library(tidyr)
library(plotly)
library(ggplot2)

uasconn <- dbConnect(RSQLite::SQLite(), "thelook_db")
dbListTables(uasconn)
##  [1] "distribution_centers" "events"               "inventory_items"     
##  [4] "no5_test1"            "no5_test2"            "no6"                 
##  [7] "order_items"          "orders"               "products"            
## [10] "user"

Jumlah pengguna unik, jumlah pesanan, dan total harga jual per status dan Bulan

no1 <- dbGetQuery(uasconn, "
                  select * from orders as o
                  inner join order_items as oi on oi.order_id = o.order_id
                  inner join user as u on u.id= o.user_id
                  ")

unique(no1$status)
## [1] "Shipped"    "Complete"   "Returned"   "Cancelled"  "Processing"
test1<- dbGetQuery(uasconn,"
                  select o.user_id, count(oi.order_id) as total_order, sum(oi.sale_price) as total_expenses from orders as o
                  inner join order_items as oi on oi.order_id = o.order_id
                  inner join user as u on u.id= o.user_id
                  where oi.status
                  in ('Complete')
                  group by o.user_id 
                  ")
datatable(test1)
no1.test <- dbGetQuery(uasconn, "
                  select * from orders as o
                  ")

test2.1 <- separate(no1.test,col=created_at, into=c("date","times"),sep=" ")
test2.2 <- separate(test2.1, col=date, into=c("year","month","date"),sep="-")

Buat kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik dengan status selesai dikelompokkan berdasarkan Bulan!

no2 <- dbGetQuery(uasconn,"
                  select o.user_id, count(oi.order_id) as total_order, avg(oi.sale_price) as average_expenses from orders as o
                  inner join order_items as oi on oi.order_id = o.order_id
                  inner join user as u on u.id= o.user_id
                  where oi.status
                  in ('Complete')
                  group by o.user_id 
                  ")


datatable(no2)

Bandingkan total order Complete, Cancel, Returned per bulan dalam 1 grafik

no3 <- dbGetQuery(uasconn, "
                  select * from orders as o
                  where status
                  in ('Complete','Cancelled','Returned')
                  ")
no3 %>% 
  count(status) %>%
  arrange(desc(n))%>%
  hchart('bar', hcaes(x = 'status', y = 'n', color= viridis::plasma(3)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b>Total Orders:</b> {point.y} <br>') %>% 
  hc_title(text = 'Total Orders based on Status',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = 'By Frecuency Order',
              style = list(fontSize = '16px')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
color1 = viridis::rocket(n=36)


test3.1 <- separate(no3,col=created_at, into=c("date","time","zones"),sep=" ")
test3.2 <- separate(test3.1, col=date, into=c("year","month","date"),sep="-")


test3.2 %>% 
  count(status, month) %>%
  hchart('column', hcaes(x = status, y = n, group= month))%>%
  hc_tooltip(pointFormat = '<b>Total Orders:</b> {point.y} <br>') %>% 
  hc_title(text = 'Total Orders based on Status',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = 'orders per month',
              style = list(fontSize = '16px')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
my_3 <-test3.2 %>% 
  count(status, year, month)
plot_ly(my_3, x=~year, y=~n, type="bar", color=~month) %>%
  layout(title ="Total Orders based on Status",
         yaxis = list(title="Orders"))

Visualisasi Data Customer berdasarkan country dengan pie chart

no4 <- dbGetQuery(uasconn, "
                  select country, count(country) as frec_user from user
                  group by country
                  ")
no4<- no4%>%
  mutate(pert= frec_user/sum(frec_user))

no4%>%
  arrange(desc(frec_user))%>%
  hchart("pie", hcaes(x=country, y=frec_user, color= viridis::inferno(15)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b>User :</b> {point.y} <br>') %>% 
  hc_title(text = 'Customers based on Country',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')

Visualisasikan berdasarkan date, status,total_unique_users,total_orders, total_sale_price!

no5 <- dbGetQuery(uasconn,"
                  select u.id as user_id, o.order_id, o.status, o.created_at as order_date, o.returned_at as returned_date, o.shipped_at as shipped_date, o.delivered_at as delivered_date,  o.num_of_item, oi.sale_price from orders as o
                  inner join order_items as oi on oi.order_id=o.order_id
                  inner join user as u on u.id=o.user_id
                  ")

test5.1 <- separate(no5,col=order_date, into=c("order_date","order_time"),sep=" ")

test5.2 <- separate(test5.1,col=returned_date, into=c("returned_date","returned_time"),sep=" ")

test5.3 <- separate(test5.2,col=shipped_date, into=c("shipped_date","shipped_time"),sep=" ")

test5.4 <- separate(test5.3,col=delivered_date, into=c("delivered_date","delivered_time"),sep=" ")

#dbWriteTable(uasconn, "no5_test2", test5.4)
dbListTables(uasconn) # check if table already inserted to database
##  [1] "distribution_centers" "events"               "inventory_items"     
##  [4] "no5_test1"            "no5_test2"            "no6"                 
##  [7] "order_items"          "orders"               "products"            
## [10] "user"
no5.2 <- dbGetQuery(uasconn, "
                    select  status, order_date, count(distinct user_id) as total_unique_customer,count(order_id) as total_orders, sum(num_of_item) as num_of_items, sum(sale_price) as revenue from no5_test2
                    group by status, order_date
                    ")

datatable(no5.2,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Order Summary")))
ggplot(data=no5.2,
           aes(x = order_date, y=revenue))+
  geom_bar(stat="identity", width = 0.25, fill ="pink")+
  facet_wrap(~status)

Shiny applications not supported in static R Markdown documents

Berapa banyak yang kita jual setiap bulannya? Apakah tinggi atau rendah dibandingkan bulan lalu?

no6<- dbGetQuery(uasconn,"
                 select * from orders as o
                 where status = 'Complete'")



no6 <- dbGetQuery(uasconn,"
                  select o.user_id, sum(oi.sale_price) as total_expenses, o.created_at from orders as o
                  inner join order_items as oi on oi.order_id = o.order_id
                  inner join user as u on u.id= o.user_id
                  where oi.status
                  in ('Complete')
                  group by o.user_id 
                  ")

test6.1 <- separate(no6,col=created_at, into=c("date","time"),sep=" ")
test6.2 <- separate(test6.1, col=date, into=c("year","month","day"),sep="-")

#dbWriteTable(uasconn,"no6",test6.2)
no6.2 <- dbGetQuery(uasconn,"
                    select year, month, sum(total_expenses) as total_expenses from no6
                    group by year, month")
plot_ly(no6.2, x=~year, y=~total_expenses, type="bar", color=~month) %>%
  layout(title ="Revenue Completed Order Overtime",
         yaxis = list(title="Revenue"))

Dari negara manakah pelanggan utama e-commerce tersebut berasal? Kelompok Gender dan Usia manakah yang memperoleh keuntungan paling besar?

no7 <- dbGetQuery(uasconn, "
                  select u.id, u.first_name, u.country, sum(sale_price) as spending from orders as o
                  inner join order_items as oi on o.order_id=oi.order_id
                  inner join inventory_items as i on i.id = oi.inventory_item_id
                  inner join user as u on u.id = o.user_id
                  group by u.id
                  order by spending desc
                  ")

count(no7)
# top 1000 customers
no7.1 <- dbGetQuery(uasconn, "
                  select u.id, u.first_name, u.country,  sum(sale_price) as total_spending from orders as o
                  inner join order_items as oi on o.order_id=oi.order_id
                  inner join inventory_items as i on i.id = oi.inventory_item_id
                  inner join user as u on u.id = o.user_id
                  group by u.id
                  order by total_spending desc
                  limit 1000
                  ")
datatable(no7.1,
          caption = htmltools::tags$caption(
            style = 'caption-side: bottom; text-align: center;',
            'Table: ', htmltools::em("Top 1000 Spender")))
# umur
no7.2 <- dbGetQuery(uasconn, "
                  select u.age,sum(sale_price)-sum(i.cost)-sum(p.cost) as total_profit from orders as o
                  inner join order_items as oi on o.order_id=oi.order_id
                  inner join inventory_items as i on i.id = oi.inventory_item_id
                  inner join user as u on u.id = o.user_id
                  inner join products as p on p.id=oi.product_id
                  group by u.age
                  order by total_profit desc
                  ")

datatable(no7.2,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Profit based on Cust's Age")))
# gender
no8 <- dbGetQuery(uasconn, "
                  select u.gender, sum(oi.sale_price)- sum(i.cost)-sum(p.cost) as profit from orders as o
                  inner join order_items as oi on o.order_id=oi.order_id
                  inner join inventory_items as i on i.id = oi.inventory_item_id
                  inner join user as u on u.id = o.user_id
                  inner join products as p on p.id=oi.product_id
                  group by u.gender
                  ")

no8%>%
  hchart("column", hcaes(x="gender", y="profit", color= viridis::cividis(n=2)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '{point.y} <br>') %>% 
  hc_title(text = 'Profit',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')

Merek dan kategori produk apa yang paling banyak terjual dan perlihatkan revenuenya!

no9 <- dbGetQuery(uasconn,"
                  select p.category ,sum(o.num_of_item)  as sales, oi.sale_price as price_item ,sum(oi.sale_price) as revenue from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status = 'Complete'
                  group by p.category
                  order by revenue desc
                  ")

no9.1 <- dbGetQuery(uasconn,"
                  select p.category, sum(o.num_of_item)  as sales ,sum(oi.sale_price) as revenue from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status = 'Complete'
                  group by p.category
                  order by revenue desc
                  ")

no9.2 <- dbGetQuery(uasconn,"
                  select p.category, p.brand, sum(o.num_of_item)  as sales ,sum(oi.sale_price) as revenue from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status = 'Complete'
                  group by p.brand
                  order by revenue desc
                  limit 25
                  ")
# top sales overall
no9%>%
  hchart("pie",hcaes(x= category, y=sales, color = viridis::mako(26)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b> Sales :</b>{point.y} <br>') %>% 
  hc_title(text = 'Sales by Categories',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
no9.3 <-dbGetQuery(uasconn,"
                  select p.brand ,sum(o.num_of_item)  as sales, oi.sale_price as price_item ,sum(oi.sale_price) as revenue from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status = 'Complete'
                  group by p.brand
                  order by revenue desc
                  limit 30
                  ")

no9.3%>%
  hchart("pie",hcaes(x= brand, y=sales, color = viridis::viridis(30)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b> Sales :</b>{point.y} <br>') %>% 
  hc_title(text = 'Top 30 Sales by Brand',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
# top categories overall
no9.1%>%
  hchart("pie",hcaes(x= category, y=revenue, color = viridis::mako(26)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b> Revenue :</b>{point.y} <br>') %>% 
  hc_title(text = 'Revenue by Categories',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
# top brand
no9.2%>%
  hchart("pie",hcaes(x= brand, y=revenue, color = viridis::rocket(25)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b> Revenue :</b> {point.y} <br>') %>% 
  hc_title(text = 'Top 25 Brand based on Revenue',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')
# top categories berdasarkan top 25
no9.2%>%
  count(category)%>%
  arrange(desc(n))%>%
  hchart("bar",hcaes(x= category, y=n, color = viridis::plasma(9)))%>%
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = 'shows <b>{point.y}</b> many times in top 25 brand<br>') %>% 
  hc_title(text = 'Top Categories',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = 'From Top 25 Brand',
              style = list(fontSize = '16px')) %>% 
  hc_credits(enabled = TRUE, text = '@bunbun_database')

Merek dan kategori produk apa yang paling banyak dibatalkan dan dikembalikan?

# category canceled
no10 <-dbGetQuery(uasconn,"
                  select p.category, count(oi.order_id) as times_cancelled  from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status = 'Cancelled'
                  group by p.category
                  order by times_cancelled desc
                  ")

# brand returned
no10.1 <-dbGetQuery(uasconn,"
                  select p.brand, p.category, count(oi.order_id) as times_returned from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status= 'Returned'
                  group by p.brand
                  having times_returned >20
                  order by times_returned desc
                  
                  ")
# brand cancelled
no10.2 <-dbGetQuery(uasconn,"
                  select p.brand, p.category, count(oi.order_id) as times_cancelled from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status= 'Cancelled'
                  group by p.brand
                  having times_cancelled >20
                  order by times_cancelled desc
                  ")

# category cancelled
no10.3 <-dbGetQuery(uasconn,"
                  select p.category, count(oi.order_id) as times_returned from orders as o
                  inner join order_items as oi on o.order_id =oi.order_id
                  inner join products as p on p.id=oi.order_id
                  where o.status= 'Returned'
                  group by p.category
                  order by times_returned desc
                  ")
datatable(no10,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Cancelled Orders by Categories")))
datatable(no10.3,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Returned Orders by Categories")))
datatable(no10.1,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Returned Orders by Brand")))
datatable(no10.2,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Cancelled Orders by Brand")))

Buatlah kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik yang statusnya selesai dikelompokkan berdasarkan bulan!

no11 <- dbGetQuery(uasconn,"
                   select o.user_id,u.first_name, u.last_name, count(o.order_id) as times_shop, avg(oi.sale_price) as average_spending  from order_items as oi
                   inner join orders as o on oi.order_id=o.order_id
                   inner join user as u on u.id=o.user_id
                   where o.status = 'Complete'
                   group by o.user_id
                   ")


datatable(no11,
          caption = htmltools::tags$caption(
    style = 'caption-side: bottom; text-align: center;',
    'Table: ', htmltools::em("Customer's Spending Data")))
# Soal Tambahan (Optional)
#Buatlah Analisis Regresi dan Analisis Clustering menggunakan Database diatas!

#Setelah diliat pada data
## Klasifikasi
data.r <- dbGetQuery(uasconn,"
                     select u.id, u.age, u.gender, u.city,u.country, p.category,p.department, sum(o.num_of_item) as total_item , o.order_id, sum(oi.sale_price)-sum(i.cost)-sum(p.cost) as profit from user as u
                     inner join orders as o on o.user_id = u.id
                     inner join order_items as oi on oi.order_id=o.order_id
                     inner join inventory_items as i on i.product_id = oi.product_id
                     inner join products as p on p.id=i.product_id
                     where o.status = 'Complete'
                     group by o.order_id
                     ")
data.r
datatable(head(data.r,500))

### Anova
summary(aov(profit ~ total_item +age+gender+city+country+category+department, data=data.r))

# profit ~ total_item +gender+city+category 

Disconnect Data Base

dbDisconnect(uasconn)