library(Lahman)
## Warning: package 'Lahman' was built under R version 4.3.2
Automobile Sale data dan pengecilan data hingga 30 observasi juga mengecilkan dan memisahkan data menjadi data dengan 5 peubah numerik dan data dengan 4 peubah karakter
data <- read.csv("C:/Users/acer/Downloads/AutoSalesData.csv", sep=",")
numdata <- data[1:30,1:5]
numdata
## ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES
## 1 10107 30 95.70 2 2871.00
## 2 10121 34 81.35 5 2765.90
## 3 10134 41 94.74 2 3884.34
## 4 10145 45 83.26 6 3746.70
## 5 10168 36 96.66 1 3479.76
## 6 10180 29 86.13 9 2497.77
## 7 10188 48 114.84 1 5512.32
## 8 10211 41 114.84 14 4708.44
## 9 10223 37 107.18 1 3965.66
## 10 10237 23 101.44 7 2333.12
## 11 10251 28 113.88 2 3188.64
## 12 10263 34 108.14 2 3676.76
## 13 10275 45 92.83 1 4177.35
## 14 10285 36 113.88 6 4099.68
## 15 10299 23 112.93 9 2597.39
## 16 10309 41 107.18 5 4394.38
## 17 10318 46 94.74 1 4358.04
## 18 10329 42 104.67 1 4396.14
## 19 10341 41 188.73 9 7737.93
## 20 10361 20 72.55 13 1451.00
## 21 10375 21 34.91 12 733.11
## 22 10388 42 76.36 4 3207.12
## 23 10403 24 101.44 7 2434.56
## 24 10417 66 113.88 2 7516.08
## 25 10103 26 207.87 11 5404.62
## 26 10112 29 248.59 1 7209.11
## 27 10126 38 192.87 11 7329.06
## 28 10140 37 199.30 11 7374.10
## 29 10150 45 244.30 8 10993.50
## 30 10163 21 231.44 1 4860.24
chdata <-data[1:30,c("ORDERDATE","STATUS","PRODUCTLINE","PRODUCTCODE")]
chdata
## ORDERDATE STATUS PRODUCTLINE PRODUCTCODE
## 1 24/02/2018 Shipped Motorcycles S10_1678
## 2 07/05/2018 Shipped Motorcycles S10_1678
## 3 01/07/2018 Shipped Motorcycles S10_1678
## 4 25/08/2018 Shipped Motorcycles S10_1678
## 5 28/10/2018 Shipped Motorcycles S10_1678
## 6 11/11/2018 Shipped Motorcycles S10_1678
## 7 18/11/2018 Shipped Motorcycles S10_1678
## 8 15/01/2019 Shipped Motorcycles S10_1678
## 9 20/02/2019 Shipped Motorcycles S10_1678
## 10 05/04/2019 Shipped Motorcycles S10_1678
## 11 18/05/2019 Shipped Motorcycles S10_1678
## 12 28/06/2019 Shipped Motorcycles S10_1678
## 13 23/07/2019 Shipped Motorcycles S10_1678
## 14 27/08/2019 Shipped Motorcycles S10_1678
## 15 30/09/2019 Shipped Motorcycles S10_1678
## 16 15/10/2019 Shipped Motorcycles S10_1678
## 17 02/11/2019 Shipped Motorcycles S10_1678
## 18 15/11/2019 Shipped Motorcycles S10_1678
## 19 24/11/2019 Shipped Motorcycles S10_1678
## 20 17/12/2019 Shipped Motorcycles S10_1678
## 21 03/02/2020 Shipped Motorcycles S10_1678
## 22 03/03/2020 Shipped Motorcycles S10_1678
## 23 08/04/2020 Shipped Motorcycles S10_1678
## 24 13/05/2020 Disputed Motorcycles S10_1678
## 25 29/01/2018 Shipped Classic Cars S10_1949
## 26 24/03/2018 Shipped Classic Cars S10_1949
## 27 28/05/2018 Shipped Classic Cars S10_1949
## 28 24/07/2018 Shipped Classic Cars S10_1949
## 29 19/09/2018 Shipped Classic Cars S10_1949
## 30 20/10/2018 Shipped Classic Cars S10_1949
Mengurutkan data numdata berdasarkan SALES dari yang terbesar ke terkecil
numdata[order(numdata$SALES,decreasing=T),]
## ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES
## 29 10150 45 244.30 8 10993.50
## 19 10341 41 188.73 9 7737.93
## 24 10417 66 113.88 2 7516.08
## 28 10140 37 199.30 11 7374.10
## 27 10126 38 192.87 11 7329.06
## 26 10112 29 248.59 1 7209.11
## 7 10188 48 114.84 1 5512.32
## 25 10103 26 207.87 11 5404.62
## 30 10163 21 231.44 1 4860.24
## 8 10211 41 114.84 14 4708.44
## 18 10329 42 104.67 1 4396.14
## 16 10309 41 107.18 5 4394.38
## 17 10318 46 94.74 1 4358.04
## 13 10275 45 92.83 1 4177.35
## 14 10285 36 113.88 6 4099.68
## 9 10223 37 107.18 1 3965.66
## 3 10134 41 94.74 2 3884.34
## 4 10145 45 83.26 6 3746.70
## 12 10263 34 108.14 2 3676.76
## 5 10168 36 96.66 1 3479.76
## 22 10388 42 76.36 4 3207.12
## 11 10251 28 113.88 2 3188.64
## 1 10107 30 95.70 2 2871.00
## 2 10121 34 81.35 5 2765.90
## 15 10299 23 112.93 9 2597.39
## 6 10180 29 86.13 9 2497.77
## 23 10403 24 101.44 7 2434.56
## 10 10237 23 101.44 7 2333.12
## 20 10361 20 72.55 13 1451.00
## 21 10375 21 34.91 12 733.11
Menghitung rata-rata dari data frame numdata dengan peubah yaitu QUANTITYORDER
mean(numdata$QUANTITYORDERED)
## [1] 35.63333
Agregasi data dari data frame chdata dengan peubah STATUS
aggregate(chdata$STATUS,list(status=chdata$STATUS),FUN=length)
## status x
## 1 Disputed 1
## 2 Shipped 29
Summary dari data yang sudah dimanipulasi
summary(numdata)
## ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER
## Min. :10103 Min. :20.00 Min. : 34.91 Min. : 1.00
## 1st Qu.:10146 1st Qu.:28.25 1st Qu.: 94.74 1st Qu.: 1.25
## Median :10230 Median :36.50 Median :107.18 Median : 5.00
## Mean :10237 Mean :35.63 Mean :124.55 Mean : 5.50
## 3rd Qu.:10316 3rd Qu.:41.75 3rd Qu.:114.84 3rd Qu.: 9.00
## Max. :10417 Max. :66.00 Max. :248.59 Max. :14.00
## SALES
## Min. : 733.1
## 1st Qu.: 2950.4
## Median : 4032.7
## Mean : 4430.1
## 3rd Qu.: 5268.5
## Max. :10993.5
str(numdata)
## 'data.frame': 30 obs. of 5 variables:
## $ ORDERNUMBER : int 10107 10121 10134 10145 10168 10180 10188 10211 10223 10237 ...
## $ QUANTITYORDERED: int 30 34 41 45 36 29 48 41 37 23 ...
## $ PRICEEACH : num 95.7 81.3 94.7 83.3 96.7 ...
## $ ORDERLINENUMBER: int 2 5 2 6 1 9 1 14 1 7 ...
## $ SALES : num 2871 2766 3884 3747 3480 ...
summary(chdata)
## ORDERDATE STATUS PRODUCTLINE PRODUCTCODE
## Length:30 Length:30 Length:30 Length:30
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
str(chdata)
## 'data.frame': 30 obs. of 4 variables:
## $ ORDERDATE : chr "24/02/2018" "07/05/2018" "01/07/2018" "25/08/2018" ...
## $ STATUS : chr "Shipped" "Shipped" "Shipped" "Shipped" ...
## $ PRODUCTLINE: chr "Motorcycles" "Motorcycles" "Motorcycles" "Motorcycles" ...
## $ PRODUCTCODE: chr "S10_1678" "S10_1678" "S10_1678" "S10_1678" ...
Berikut adalah boxplot dari beberapa peubah numdata dan peubah SALES yang dibuat menjadi per seratus agar boxplot lebih mudah terlihat yang dibuat menjadi data frame baru bernama bpdata
SALESper100 <-numdata[,c("SALES")]/100
bpdata<-cbind(numdata[,2:3],SALESper100)
boxplot(bpdata, main="Boxplot", col="blue")
Histogram dari peubah QUANTITYORDERED
hist(numdata$QUANTITYORDERED,main="Histogram",col="Maroon")
Berikut adalah korelasi dari beberapa peubah numdata dan peubah SALES yang dibuat menjadi per seratus agar lebih mudah terlihat yang dibuat menjadi data frame baru bernama bpdata
cor(bpdata)
## QUANTITYORDERED PRICEEACH SALESper100
## QUANTITYORDERED 1.00000000 -0.01493036 0.5380826
## PRICEEACH -0.01493036 1.00000000 0.8029814
## SALESper100 0.53808255 0.80298145 1.0000000