Introduction to Data Science
Tugas 2 Kelompok
| Kontak | : \(\downarrow\) |
| naftaligunawan@gmail.com | |
| https://www.instagram.com/nbrigittag/ | |
| RPubs | https://rpubs.com/naftalibrigitta/ |
1. Import Data
library(dplyr)
library(tidyverse)
getwd()## [1] "C:/Users/Naftali Brigitta/Documents/Tugazzz/PDS"
datajualan <- read.csv("C:/Users/Naftali Brigitta/Documents/Tugazzz/PDS/data_ecommerce.csv.csv",sep=",")
datajualandatajualan <- drop_na(datajualan)2. Visualisasikan Data yang Sudah di Import
library(magrittr)
library(ggplot2)
library(dplyr)
coba1 <- datajualan %>%
select(Description, UnitPrice)
coba1Melihat nama Country yang terbesar dalam memesan barang
library(treemap)## Warning: package 'treemap' was built under R version 4.1.2
treemap(datajualan,
index = c("Country"),
vSize = "Quantity",
title = "",
palette = "Set1",
border.col = "grey40")Melihat 10 nama barang yang terjual tertinggi
library(dplyr)
sepuluh_tertinggi <- datajualan %>%
group_by(Description) %>%
summarize(count = n()) %>%
mutate(pct=(count/sum(count))*100) %>%
arrange(desc(count)) %>%
top_n(10, wt=count)
ggplot(sepuluh_tertinggi,
aes(x = Description, count),
y = -count,
fill=Description) +
coord_flip() +
geom_bar(stat="identity",
width=.90,
fill=rainbow(10)) +
xlab("") + # Set axis labels
ylab("") +
guides(fill=FALSE) +
ggtitle("10 Most Sold Product")## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
Merapihkan 10 nama barang dari yang penjualan terendah ke yang tertinggi
library(dplyr)
sepuluh_tertinggi <- datajualan %>%
group_by(Description) %>%
summarize(count = n()) %>%
mutate(pct=(count/sum(count))*100) %>%
arrange(desc(count)) %>%
top_n(10, wt=count)
ggplot(sepuluh_tertinggi,
aes(x = reorder(Description, -count),
y = count,
fill = Description)) +
coord_flip() +
geom_bar(stat="identity",
width=.90,
fill=rainbow(10)) +
xlab("") + # Set axis labels
ylab("") +
guides(fill=FALSE) +
ggtitle("10 Most Sold Product")## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
Nama Negara dan Jumlah Spending
library (dplyr)
ya = aggregate(x = datajualan$UnitPrice, by=list(datajualan$Country), FUN=mean)
names(ya) = c('Country', 'AverageSpend')
yaVersi Disort dari Jumlah Spending Terbesar
spendbesar = arrange(ya, by = desc(AverageSpend))
spendbesarMembuat Grafik berdasarkan rata-rata belanja per Negara
library(ggplot2)
library(dplyr)
library(scales)
library(ggbeeswarm)
ggplot(spendbesar, aes(x=reorder(Country , AverageSpend),y = AverageSpend, fill=Country)) +
coord_flip() +
geom_bar(stat="identity", width=.90) +
xlab("") + # Set axis labels
ylab("") +
guides(fill=FALSE) +
ggtitle(" Shop Purchase Amount by Country(£)") +
theme_minimal()## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
### Grafik dari data yang sama dari Multiverse Lain
library(ggplot2)
ggplot(spendbesar, aes(x=AverageSpend, y=reorder(Country, AverageSpend)))+
geom_point(color='navy', size=2)+
geom_segment(aes(x=120,
xend = AverageSpend,
y=reorder(Country, AverageSpend),
yend=reorder(Country, AverageSpend)), color = 'azure3') +
labs (x = "Life Expectancy (years)",
y = "",
title = "Average Purchase Total by Countries"
)+
theme_minimal()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())