Introduction to Data Science

Tugas 2 Kelompok


Kontak : \(\downarrow\)
Email mailto:ferdinand.widjaya@student.matanauniversitu.ac.id
Instagram https://www.instagram.com/fe_nw/
RPubs https://rpubs.com/ferdnw/

1. Import Data

library(dplyr)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
getwd()
## [1] "C:/Users/Public/Matkul R DLL"
setwd( "C:/Users/Public/Matkul R DLL")


datajualan <- read.csv("data_ecommerce.csv",sep=",")
datajualan
datajualan <- drop_na(datajualan)

2. Visualisasikan Data yang Sudah di Import

library(magrittr)
library(ggplot2)
library(dplyr)

coba1 <- datajualan %>% 
  select(Description, UnitPrice)
coba1

Melihat nama Country yang terbesar dalam memesan barang

library(treemap)
## Warning: package 'treemap' was built under R version 4.1.2
treemap(datajualan,
        index = c("Country"),
        vSize = "Quantity",
        title = "",
        palette = "Set1",
        border.col = "grey40")

Melihat nama dan harga barang 10 tertinggi

library(dplyr)

sepuluh_tertinggi <- datajualan %>% 
  group_by(Description) %>% 
  summarize(count = n()) %>% 
  mutate(pct=(count/sum(count))*100) %>% 
  arrange(desc(count)) %>% 
  top_n(10, wt=count)

ggplot(sepuluh_tertinggi, 
       aes(x = Description, count), 
           y = -count,
           fill=Description) + 
    coord_flip() +
    geom_bar(stat="identity", 
             width=.90, 
             fill=rainbow(10)) + 
    xlab("") +    # Set axis labels
    ylab("") + 
    guides(fill=FALSE) + 
    ggtitle("10 Most Sold Product")
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Merapihkan nama dan harga barang dari yang terendah ke yang tertinggi

library(dplyr)
sepuluh_tertinggi <- datajualan %>% 
  group_by(Description) %>% 
  summarize(count = n()) %>% 
  mutate(pct=(count/sum(count))*100) %>% 
  arrange(desc(count)) %>% 
  top_n(10, wt=count)

ggplot(sepuluh_tertinggi, 
       aes(x = reorder(Description, -count), 
           y = count, 
           fill = Description)) + 
    coord_flip() +
    geom_bar(stat="identity", 
             width=.90, 
             fill=rainbow(10)) + 
    xlab("") +    # Set axis labels
    ylab("") + 
    guides(fill=FALSE) + 
    ggtitle("10 Most Sold Product")
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Nama Negara dan Jumlah Spending

library (dplyr)
ya = aggregate(x = datajualan$UnitPrice, by=list(datajualan$Country), FUN=mean)

names(ya) = c('Country', 'AverageSpend')
ya

Versi Disort dari Jumlah Spending Terbesar

spendbesar = arrange(ya, by = desc(AverageSpend))
spendbesar

Membuat Grafik berdasarkan rata-rata belanja per Negara

library(ggplot2)
library(dplyr)
library(scales)
## Warning: package 'scales' was built under R version 4.1.2
library(ggbeeswarm)
## Warning: package 'ggbeeswarm' was built under R version 4.1.2
ggplot(spendbesar, aes(x=reorder(Country , AverageSpend),y = AverageSpend,  fill=Country)) + 
    coord_flip() +
    geom_bar(stat="identity", width=.90) + 
    xlab("") + # Set axis labels
    ylab("") + 
    guides(fill=FALSE) + 
    ggtitle(" Shop Purchase Amount by Country(£)") + 
    theme_minimal()
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

### Grafik dari data yang sama dari Multiverse Lain

library(ggplot2)
ggplot(spendbesar, aes(x=AverageSpend, y=reorder(Country, AverageSpend)))+
  geom_point(color='navy', size=2)+
  geom_segment(aes(x=120,
                  xend = AverageSpend,
                  y=reorder(Country, AverageSpend),
                  yend=reorder(Country, AverageSpend)), color = 'azure3') +
  labs (x = "Life Expectancy (years)",
        y = "",
        title = "Average Purchase Total by Countries"
      )+
  theme_minimal()+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())