library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Data yang digunakan merupakan data Auto Seles data yang berasal dari https://www.kaggle.com/datasets/ddosad/auto-sales-data. Data yang berbetuk csv diimpor ke RStudio dengan perintah read_csv yang terdapat pada library(tidyverse) dan diberi nama data.
data <- read_csv("Auto Sales data.csv",show_col_types = FALSE)
data
## # A tibble: 2,747 × 20
## ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES ORDERDATE
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 10107 30 95.7 2 2871 24/02/2018
## 2 10121 34 81.4 5 2766. 07/05/2018
## 3 10134 41 94.7 2 3884. 01/07/2018
## 4 10145 45 83.3 6 3747. 25/08/2018
## 5 10168 36 96.7 1 3480. 28/10/2018
## 6 10180 29 86.1 9 2498. 11/11/2018
## 7 10188 48 115. 1 5512. 18/11/2018
## 8 10211 41 115. 14 4708. 15/01/2019
## 9 10223 37 107. 1 3966. 20/02/2019
## 10 10237 23 101. 7 2333. 05/04/2019
## # ℹ 2,737 more rows
## # ℹ 14 more variables: DAYS_SINCE_LASTORDER <dbl>, STATUS <chr>,
## # PRODUCTLINE <chr>, MSRP <dbl>, PRODUCTCODE <chr>, CUSTOMERNAME <chr>,
## # PHONE <chr>, ADDRESSLINE1 <chr>, CITY <chr>, POSTALCODE <chr>,
## # COUNTRY <chr>, CONTACTLASTNAME <chr>, CONTACTFIRSTNAME <chr>,
## # DEALSIZE <chr>
Satelah mengimport data, perintah select digunakan untuk mengambil peubah ORDERNUMBER, QUANTITYORDERED, PRICEEACH, SALES, DAYS_SINCE_LASTORDER, dan PRODUCTLINE dengan masing-masing peubah memiliki 10 observasi (observasi yang diambil merupakan 10 observasi pertama).
TabelData <- select(.data=data,'ORDERNUMBER','QUANTITYORDERED','PRICEEACH','SALES','DAYS_SINCE_LASTORDER',PRODUCTLINE)
TabelData <- TabelData[1:10,]
TabelData
## # A tibble: 10 × 6
## ORDERNUMBER QUANTITYORDERED PRICEEACH SALES DAYS_SINCE_LASTORDER PRODUCTLINE
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 10107 30 95.7 2871 828 Motorcycles
## 2 10121 34 81.4 2766. 757 Motorcycles
## 3 10134 41 94.7 3884. 703 Motorcycles
## 4 10145 45 83.3 3747. 649 Motorcycles
## 5 10168 36 96.7 3480. 586 Motorcycles
## 6 10180 29 86.1 2498. 573 Motorcycles
## 7 10188 48 115. 5512. 567 Motorcycles
## 8 10211 41 115. 4708. 510 Motorcycles
## 9 10223 37 107. 3966. 475 Motorcycles
## 10 10237 23 101. 2333. 432 Motorcycles
Menghitung dimensi data frame (baris dan kolom)
dim(TabelData)
## [1] 10 6
Menampilkan statistik ringkasan
summary(TabelData)
## ORDERNUMBER QUANTITYORDERED PRICEEACH SALES
## Min. :10107 Min. :23.0 Min. : 81.35 Min. :2333
## 1st Qu.:10137 1st Qu.:31.0 1st Qu.: 88.28 1st Qu.:2792
## Median :10174 Median :36.5 Median : 96.18 Median :3613
## Mean :10171 Mean :36.4 Mean : 97.61 Mean :3577
## 3rd Qu.:10205 3rd Qu.:41.0 3rd Qu.:105.75 3rd Qu.:3945
## Max. :10237 Max. :48.0 Max. :114.84 Max. :5512
## DAYS_SINCE_LASTORDER PRODUCTLINE
## Min. :432.0 Length:10
## 1st Qu.:524.2 Class :character
## Median :579.5 Mode :character
## Mean :608.0
## 3rd Qu.:689.5
## Max. :828.0
TabelData %>%
select(ORDERNUMBER, PRICEEACH, SALES) %>%
arrange(SALES)
## # A tibble: 10 × 3
## ORDERNUMBER PRICEEACH SALES
## <dbl> <dbl> <dbl>
## 1 10237 101. 2333.
## 2 10180 86.1 2498.
## 3 10121 81.4 2766.
## 4 10107 95.7 2871
## 5 10168 96.7 3480.
## 6 10145 83.3 3747.
## 7 10134 94.7 3884.
## 8 10223 107. 3966.
## 9 10211 115. 4708.
## 10 10188 115. 5512.
TabelData %>%
select(ORDERNUMBER, DAYS_SINCE_LASTORDER) %>%
arrange(desc(DAYS_SINCE_LASTORDER))
## # A tibble: 10 × 2
## ORDERNUMBER DAYS_SINCE_LASTORDER
## <dbl> <dbl>
## 1 10107 828
## 2 10121 757
## 3 10134 703
## 4 10145 649
## 5 10168 586
## 6 10180 573
## 7 10188 567
## 8 10211 510
## 9 10223 475
## 10 10237 432
TabelData %>%
select(ORDERNUMBER, SALES, QUANTITYORDERED) %>%
filter(QUANTITYORDERED > 35) %>%
arrange(desc(SALES))
## # A tibble: 6 × 3
## ORDERNUMBER SALES QUANTITYORDERED
## <dbl> <dbl> <dbl>
## 1 10188 5512. 48
## 2 10211 4708. 41
## 3 10223 3966. 37
## 4 10134 3884. 41
## 5 10145 3747. 45
## 6 10168 3480. 36
Grafik histogram sales
Sales <- TabelData$SALES
hist(Sales, main="Grafik Histogram Sales", col = "orange")
Grafik boxplot Quantity Oredered dan Price Each
boxplot(TabelData$QUANTITYORDERED, TabelData$PRICEEACH, main="Grafik Boxplot Quantity Oredered dan Price Each", col="cyan")
Menghitung korelasi antara QUANTITYORDERED , PRICEEACH , dan SALES
data2 <- TabelData[1:10,2:4]
data2
## # A tibble: 10 × 3
## QUANTITYORDERED PRICEEACH SALES
## <dbl> <dbl> <dbl>
## 1 30 95.7 2871
## 2 34 81.4 2766.
## 3 41 94.7 3884.
## 4 45 83.3 3747.
## 5 36 96.7 3480.
## 6 29 86.1 2498.
## 7 48 115. 5512.
## 8 41 115. 4708.
## 9 37 107. 3966.
## 10 23 101. 2333.
Korelasi:
cor(data2)
## QUANTITYORDERED PRICEEACH SALES
## QUANTITYORDERED 1.0000000 0.2790934 0.8858049
## PRICEEACH 0.2790934 1.0000000 0.6872597
## SALES 0.8858049 0.6872597 1.0000000