Basis Data
Ujian Akhir Semester
| Kontak | : \(\downarrow\) |
| ali.19arifin@gmail.com | |
| https://www.instagram.com/arifin.alicia/ | |
| RPubs | https://rpubs.com/aliciaarifin/ |
| Nama | Alicia Arifin |
| NIM | 20214920001 |
| Prodi | Statistika, 2021 |
Soal UAS
- Buat kueri untuk mendapatkan jumlah pengguna unik, jumlah pesanan, dan total harga jual per status dan Bulan!
- Buat kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik dengan status selesai dikelompokkan berdasarkan Bulan!
- Bandingkan total oder Complete, Cancel, Returned per bulan dalam 1 grafik
- Visualisasi Data Customer berdasarkan country dengan pie chart
- Visualisasikan berdasarkan date, status,total_unique_users,total_orders, total_sale_price!
- Berapa banyak yang kita jual setiap bulannya? Apakah tinggi atau rendah dibandingkan bulan lalu?
- Dari negara manakah pelanggan utama e-commerce tersebut berasal? Kelompok Gender dan Usia manakah yang memperoleh keuntungan paling besar?
- Merek dan kategori produk apa yang paling banyak terjual dan perlihatkan revenuenya!
- Merek dan kategori produk apa yang paling banyak dibatalkan dan dikembalikan?
- Buatlah kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata,
dan jumlah total pengguna unik yang statusnya selesai dikelompokkan
berdasarkan bulan!
Soal Tmabahan (Optional) - Buatlah Analisis Regresi dan Analisis Clustering menggunakan Database diatas!
Prepare Data
Diagram of
Connect to MySQLite and Library
library(DBI)
library(RSQLite)
library(DT)
library(highcharter)
library(dplyr)
library(viridis)
library(tidyr)
library(plotly)
library(ggplot2)
uasconn <- dbConnect(RSQLite::SQLite(), "thelook_db")
dbListTables(uasconn)## [1] "distribution_centers" "events" "inventory_items"
## [4] "no5_test1" "no5_test2" "no6"
## [7] "order_items" "orders" "products"
## [10] "user"
Jumlah pengguna unik, jumlah pesanan, dan total harga jual per status dan Bulan
no1 <- dbGetQuery(uasconn, "
select * from orders as o
inner join order_items as oi on oi.order_id = o.order_id
inner join user as u on u.id= o.user_id
")
unique(no1$status)## [1] "Shipped" "Complete" "Returned" "Cancelled" "Processing"
test1<- dbGetQuery(uasconn,"
select o.user_id, count(oi.order_id) as total_order, sum(oi.sale_price) as total_expenses from orders as o
inner join order_items as oi on oi.order_id = o.order_id
inner join user as u on u.id= o.user_id
where oi.status
in ('Complete')
group by o.user_id
")
datatable(test1)Buat kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik dengan status selesai dikelompokkan berdasarkan Bulan!
no2 <- dbGetQuery(uasconn,"
select o.user_id, count(oi.order_id) as total_order, avg(oi.sale_price) as average_expenses from orders as o
inner join order_items as oi on oi.order_id = o.order_id
inner join user as u on u.id= o.user_id
where oi.status
in ('Complete')
group by o.user_id
")
datatable(no2)Bandingkan total order Complete, Cancel, Returned per bulan dalam 1 grafik
no3 <- dbGetQuery(uasconn, "
select * from orders as o
where status
in ('Complete','Cancelled','Returned')
")
no3 %>%
count(status) %>%
arrange(desc(n))%>%
hchart('bar', hcaes(x = 'status', y = 'n', color= viridis::plasma(3)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b>Total Orders:</b> {point.y} <br>') %>%
hc_title(text = 'Total Orders based on Status',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_subtitle(text = 'By Frecuency Order',
style = list(fontSize = '16px')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')color1 = viridis::rocket(n=36)
test3.1 <- separate(no3,col=created_at, into=c("date","time","zones"),sep=" ")
test3.2 <- separate(test3.1, col=date, into=c("year","month","date"),sep="-")
test3.2 %>%
count(status, month) %>%
hchart('column', hcaes(x = status, y = n, group= month))%>%
hc_tooltip(pointFormat = '<b>Total Orders:</b> {point.y} <br>') %>%
hc_title(text = 'Total Orders based on Status',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_subtitle(text = 'orders per month',
style = list(fontSize = '16px')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')Visualisasi Data Customer berdasarkan country dengan pie chart
no4 <- dbGetQuery(uasconn, "
select country, count(country) as frec_user from user
group by country
")
no4<- no4%>%
mutate(pert= frec_user/sum(frec_user))
no4%>%
arrange(desc(frec_user))%>%
hchart("pie", hcaes(x=country, y=frec_user, color= viridis::inferno(15)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b>User :</b> {point.y} <br>') %>%
hc_title(text = 'Customers based on Country',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')Visualisasikan berdasarkan date, status,total_unique_users,total_orders, total_sale_price!
no5 <- dbGetQuery(uasconn,"
select u.id as user_id, o.order_id, o.status, o.created_at as order_date, o.returned_at as returned_date, o.shipped_at as shipped_date, o.delivered_at as delivered_date, o.num_of_item, oi.sale_price from orders as o
inner join order_items as oi on oi.order_id=o.order_id
inner join user as u on u.id=o.user_id
")
test5.1 <- separate(no5,col=order_date, into=c("order_date","order_time"),sep=" ")
test5.2 <- separate(test5.1,col=returned_date, into=c("returned_date","returned_time"),sep=" ")
test5.3 <- separate(test5.2,col=shipped_date, into=c("shipped_date","shipped_time"),sep=" ")
test5.4 <- separate(test5.3,col=delivered_date, into=c("delivered_date","delivered_time"),sep=" ")
#dbWriteTable(uasconn, "no5_test2", test5.4)
dbListTables(uasconn) # check if table already inserted to database## [1] "distribution_centers" "events" "inventory_items"
## [4] "no5_test1" "no5_test2" "no6"
## [7] "order_items" "orders" "products"
## [10] "user"
no5.2 <- dbGetQuery(uasconn, "
select status, order_date, count(distinct user_id) as total_unique_customer,count(order_id) as total_orders, sum(num_of_item) as num_of_items, sum(sale_price) as revenue from no5_test2
group by status, order_date
")
datatable(no5.2,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Order Summary")))ggplot(data=no5.2,
aes(x = order_date, y=revenue))+
geom_bar(stat="identity", width = 0.25, fill ="pink")+
facet_wrap(~status)Shiny applications not supported in static R Markdown documents
Berapa banyak yang kita jual setiap bulannya? Apakah tinggi atau rendah dibandingkan bulan lalu?
no6<- dbGetQuery(uasconn,"
select * from orders as o
where status = 'Complete'")
no6 <- dbGetQuery(uasconn,"
select o.user_id, sum(oi.sale_price) as total_expenses, o.created_at from orders as o
inner join order_items as oi on oi.order_id = o.order_id
inner join user as u on u.id= o.user_id
where oi.status
in ('Complete')
group by o.user_id
")
test6.1 <- separate(no6,col=created_at, into=c("date","time"),sep=" ")
test6.2 <- separate(test6.1, col=date, into=c("year","month","day"),sep="-")
#dbWriteTable(uasconn,"no6",test6.2)
no6.2 <- dbGetQuery(uasconn,"
select year, month, sum(total_expenses) as total_expenses from no6
group by year, month")Dari negara manakah pelanggan utama e-commerce tersebut berasal? Kelompok Gender dan Usia manakah yang memperoleh keuntungan paling besar?
no7 <- dbGetQuery(uasconn, "
select u.id, u.first_name, u.country, sum(sale_price) as spending from orders as o
inner join order_items as oi on o.order_id=oi.order_id
inner join inventory_items as i on i.id = oi.inventory_item_id
inner join user as u on u.id = o.user_id
group by u.id
order by spending desc
")
count(no7)# top 1000 customers
no7.1 <- dbGetQuery(uasconn, "
select u.id, u.first_name, u.country, sum(sale_price) as total_spending from orders as o
inner join order_items as oi on o.order_id=oi.order_id
inner join inventory_items as i on i.id = oi.inventory_item_id
inner join user as u on u.id = o.user_id
group by u.id
order by total_spending desc
limit 1000
")datatable(no7.1,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Top 1000 Spender")))# umur
no7.2 <- dbGetQuery(uasconn, "
select u.age,sum(sale_price)-sum(i.cost)-sum(p.cost) as total_profit from orders as o
inner join order_items as oi on o.order_id=oi.order_id
inner join inventory_items as i on i.id = oi.inventory_item_id
inner join user as u on u.id = o.user_id
inner join products as p on p.id=oi.product_id
group by u.age
order by total_profit desc
")
datatable(no7.2,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Profit based on Cust's Age")))# gender
no8 <- dbGetQuery(uasconn, "
select u.gender, sum(oi.sale_price)- sum(i.cost)-sum(p.cost) as profit from orders as o
inner join order_items as oi on o.order_id=oi.order_id
inner join inventory_items as i on i.id = oi.inventory_item_id
inner join user as u on u.id = o.user_id
inner join products as p on p.id=oi.product_id
group by u.gender
")
no8%>%
hchart("column", hcaes(x="gender", y="profit", color= viridis::cividis(n=2)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '{point.y} <br>') %>%
hc_title(text = 'Profit',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')Merek dan kategori produk apa yang paling banyak terjual dan perlihatkan revenuenya!
no9 <- dbGetQuery(uasconn,"
select p.category ,sum(o.num_of_item) as sales, oi.sale_price as price_item ,sum(oi.sale_price) as revenue from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status = 'Complete'
group by p.category
order by revenue desc
")
no9.1 <- dbGetQuery(uasconn,"
select p.category, sum(o.num_of_item) as sales ,sum(oi.sale_price) as revenue from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status = 'Complete'
group by p.category
order by revenue desc
")
no9.2 <- dbGetQuery(uasconn,"
select p.category, p.brand, sum(o.num_of_item) as sales ,sum(oi.sale_price) as revenue from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status = 'Complete'
group by p.brand
order by revenue desc
limit 25
")# top sales overall
no9%>%
hchart("pie",hcaes(x= category, y=sales, color = viridis::mako(26)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b> Sales :</b>{point.y} <br>') %>%
hc_title(text = 'Sales by Categories',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')no9.3 <-dbGetQuery(uasconn,"
select p.brand ,sum(o.num_of_item) as sales, oi.sale_price as price_item ,sum(oi.sale_price) as revenue from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status = 'Complete'
group by p.brand
order by revenue desc
limit 30
")
no9.3%>%
hchart("pie",hcaes(x= brand, y=sales, color = viridis::viridis(30)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b> Sales :</b>{point.y} <br>') %>%
hc_title(text = 'Top 30 Sales by Brand',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')# top categories overall
no9.1%>%
hchart("pie",hcaes(x= category, y=revenue, color = viridis::mako(26)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b> Revenue :</b>{point.y} <br>') %>%
hc_title(text = 'Revenue by Categories',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')# top brand
no9.2%>%
hchart("pie",hcaes(x= brand, y=revenue, color = viridis::rocket(25)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b> Revenue :</b> {point.y} <br>') %>%
hc_title(text = 'Top 25 Brand based on Revenue',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')# top categories berdasarkan top 25
no9.2%>%
count(category)%>%
arrange(desc(n))%>%
hchart("bar",hcaes(x= category, y=n, color = viridis::plasma(9)))%>%
hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = 'shows <b>{point.y}</b> many times in top 25 brand<br>') %>%
hc_title(text = 'Top Categories',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_subtitle(text = 'From Top 25 Brand',
style = list(fontSize = '16px')) %>%
hc_credits(enabled = TRUE, text = '@bunbun_database')Merek dan kategori produk apa yang paling banyak dibatalkan dan dikembalikan?
# category canceled
no10 <-dbGetQuery(uasconn,"
select p.category, count(oi.order_id) as times_cancelled from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status = 'Cancelled'
group by p.category
order by times_cancelled desc
")
# brand returned
no10.1 <-dbGetQuery(uasconn,"
select p.brand, p.category, count(oi.order_id) as times_returned from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status= 'Returned'
group by p.brand
having times_returned >20
order by times_returned desc
")
# brand cancelled
no10.2 <-dbGetQuery(uasconn,"
select p.brand, p.category, count(oi.order_id) as times_cancelled from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status= 'Cancelled'
group by p.brand
having times_cancelled >20
order by times_cancelled desc
")
# category cancelled
no10.3 <-dbGetQuery(uasconn,"
select p.category, count(oi.order_id) as times_returned from orders as o
inner join order_items as oi on o.order_id =oi.order_id
inner join products as p on p.id=oi.order_id
where o.status= 'Returned'
group by p.category
order by times_returned desc
")datatable(no10,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Cancelled Orders by Categories")))datatable(no10.3,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Returned Orders by Categories")))Buatlah kueri untuk mendapatkan frekuensi, nilai pesanan rata-rata, dan jumlah total pengguna unik yang statusnya selesai dikelompokkan berdasarkan bulan!
no11 <- dbGetQuery(uasconn,"
select o.user_id,u.first_name, u.last_name, count(o.order_id) as times_shop, avg(oi.sale_price) as average_spending from order_items as oi
inner join orders as o on oi.order_id=o.order_id
inner join user as u on u.id=o.user_id
where o.status = 'Complete'
group by o.user_id
")
datatable(no11,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
'Table: ', htmltools::em("Customer's Spending Data")))# Soal Tambahan (Optional)
#Buatlah Analisis Regresi dan Analisis Clustering menggunakan Database diatas!
#Setelah diliat pada data
## Klasifikasi
data.r <- dbGetQuery(uasconn,"
select u.id, u.age, u.gender, u.city,u.country, p.category,p.department, sum(o.num_of_item) as total_item , o.order_id, sum(oi.sale_price)-sum(i.cost)-sum(p.cost) as profit from user as u
inner join orders as o on o.user_id = u.id
inner join order_items as oi on oi.order_id=o.order_id
inner join inventory_items as i on i.product_id = oi.product_id
inner join products as p on p.id=i.product_id
where o.status = 'Complete'
group by o.order_id
")
data.r
datatable(head(data.r,500))
### Anova
summary(aov(profit ~ total_item +age+gender+city+country+category+department, data=data.r))
# profit ~ total_item +gender+city+category
Referensi
- https://bookdown.org/dsciencelabs/sql_in_r/_book/
- https://www.highcharts.com/docs/chart-and-series-types/bar-chart
- https://www.datanovia.com/en/lessons/highchart-interactive-pie-chart-and-alternatives-in-r/
- https://www.statology.org/separate-function-in-r/
- https://rpubs.com/mr148/304105
- https://youtube.com/watch?v=C3CbZxsCrmE
- https://plotly.com/r/bar-charts/
- https://stackoverflow.com/questions/70614465/why-code-folding-hide-doesnt-give-the-option-to-hide-show-code-in-rmarkdown
- https://www.scribbr.com/statistics/anova-in-r/
- https://blog.enterprisedna.co/sql-distinct/#:~:text=The%20DISTINCT%20clause%20is%20used,of%20columns%20in%20a%20table.