Pengatar Data Sains
~ Tugas 2 ~
| Kontak | : \(\downarrow\) |
| diyasaryanugroho@gmail.com | |
| https://www.instagram.com/diasary_nm/ | |
| RPubs | https://rpubs.com/diyasarya/ |
Link Video Penjelasan
https://drive.google.com/file/d/1Z-doEYBoQBBK0o5YtTQrAAX68Qf9QBQK/view?usp=sharing
Impot Data
library(tidyverse)
Data_Ecom <- read_csv("C:/Users/diyas/OneDrive/Documents/Semester 2 Matana University/DataSains/Tugas/data_ecommerce.csv")
Data_Ecompisah <- separate(Data_Ecom,
col = InvoiceDate,
into = c("Date", "Time"),
sep = " "
)
pisah_Date <- pisah %>%
count(Date)
pisah_Date1 <- arrange(pisah_Date, desc(n))
datetop10 <- pisah_Date1[1:20,]Visualisasi
Banyaknya Transaksi berdasarkan Date
ggplot(datetop10, aes(x = Date, y = n)) +
geom_point(color = "deepskyblue",
size = 1,
alpha = .8) +
theme_minimal() +
labs(title = "Banyaknya Transaksi berdasarkan Date",
y = "Transaksi",
x = "Date") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Total Penghasilan per Negara
Price <- pisah %>%
mutate(Price_total = Quantity * UnitPrice)
country <- Price %>%
count(Country)
Country = c("Australia" , "Austria" , "Bahrain" , "Belgium" , "Brazil" ,
"Canada" , "Channel Islands" , "Cyprus" , "Czech Republic" , "Denmark" ,
"EIRE" , "European Community" , "Finland" , "France" , "Germany" ,
"Greece" , "Hong Kong" , "Iceland" , "Israel" , "Italy" ,
"Japan" , "Lebanon" , "Lithuania" , "Malta" , "Netherlands" ,
"Norway" , "Poland" , "Portugal" , "RSA" , "Saudi Arabia" ,
"Singapore" , "Spain" , "Sweden" , "Switzerland" , "United Arab Emirates",
"United Kingdom", "Unspecified" , "USA" )
for (x in Country) {
data = subset(Price, subset=(Country == x))
print(sum(data$Price_total))}## [1] 137077.3
## [1] 10154.32
## [1] 548.4
## [1] 40910.96
## [1] 1143.6
## [1] 3666.38
## [1] 20086.29
## [1] 12946.29
## [1] 707.72
## [1] 18768.14
## [1] 263276.8
## [1] 1291.75
## [1] 22326.74
## [1] 197403.9
## [1] 221698.2
## [1] 4710.52
## [1] 10117.04
## [1] 4310
## [1] 7907.82
## [1] 16890.51
## [1] 35340.62
## [1] 1693.88
## [1] 1661.06
## [1] 2505.47
## [1] 284661.5
## [1] 35163.46
## [1] 7213.14
## [1] 29367.02
## [1] 1002.31
## [1] 131.17
## [1] 9120.39
## [1] 54774.58
## [1] 36595.91
## [1] 56385.35
## [1] 1902.28
## [1] 8187806
## [1] 4749.79
## [1] 1730.92
pendapatan_negara = data.frame("Negara" = Country,
"Penghasilan" = c(137077.3, 10154.32, 548.4, 40910.96, 1143.6, 3666.38, 20086.29,
12946.29, 707.72, 18768.14, 263276.8, 1291.75, 22326.74, 197403.9,
221698.2, 4710.52, 10117.04, 4310, 7907.82, 16890.51, 35340.62, 1693.88,
1661.06, 2505.47, 284661.5, 35163.46, 7213.14, 29367.02, 1002.31, 131.17,
9120.39, 54774.58, 36595.91, 56385.35, 1902.28, 8187806, 4749.79, 1730.92))
top10 = arrange(pendapatan_negara, desc(Penghasilan))
top10 = top10[1:10,]Perbandingan Penjualan di UK dengan Negara Lain
pendapatan = pendapatan_negara%>%
mutate(percent = Penghasilan/sum(Penghasilan),
pert = round(percent,2)*100)
p = subset(pendapatan, subset = Negara!= "United Kingdom")
pendapatan = data.frame("a" = c("United Kingdom", "Others"),
"b" = c(84, sum(p$pert)))
pendapatan =pendapatan %>%
mutate(c =cumsum(b) - 0.5*b)
ggplot(pendapatan, aes(x="", y= b, fill=a))+
geom_bar(width=1, stat="identity", color="white") +
coord_polar("y", start = 0) +
geom_text(aes(y= c,label =b), color="white")+
scale_fill_manual(values = c("purple","orange"))+
theme_void()+
labs(title = "Persentase Penjualan di UK dan negara lain")Penghasilan World Wide
# top 10 penjualan world wide
top10 = arrange(pendapatan_negara, desc(Penghasilan))
top10 = top10[2:11,]
top10 #10 negara penjulan total terbanyak ggplot(top10,
aes(x = reorder(Negara,Penghasilan),
y = Penghasilan)) +
geom_bar(stat = "identity",
fill = rainbow(10)) +
geom_text(aes(label = Penghasilan),
vjust = -0.25) +
theme_minimal() +
labs(title = "Total Penghasilan per negara selain Inggris",
x = "negara",
y = "total ($)")+ coord_flip()Banyaknya Transaksi per Bulan
pisah1 <- separate(pisah,
col = Date,
into = c("Month", "Day", "Year"),
sep = "/"
)
helo = pisah1%>%
group_by(InvoiceNo, Year, Month)%>%
dplyr::summarise(n= n())
nrow(helo)# total ada 25900 transaksi yang terjadi## [1] 25900
t_2010 = subset(helo, subset= Year == 2010)
t_2011 = setdiff(helo, t_2010)
library(plyr)
a = count(t_2011, "Month") # ref 1 (banyak kolom)
b = count(t_2010, "Month")
a$Year = 2011
b$Year = 2010
helo = rbind(a,b)
helo$Months = paste(helo$Month,"," ,helo$Year)
ggplot(helo,
aes(x = reorder(Months,-freq),
y = freq)) +
geom_bar(stat = "identity",
fill = rainbow(13),
color= "azure4") +
geom_text(aes(label = freq),
vjust = -0.25) +
theme_minimal()+
labs(x = "Bulan, tahun",
y = "Jumlah Transaksi (Bon)",
title = "Banyaknya transaksi per bulan",
subtitle = "Note : 1 kali transaksi = 1 Bon") +
theme(axis.text.x = element_text(angle=30, hjust = 1))