Package

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Mengimpor Data

Data yang digunakan merupakan data Auto Seles data yang berasal dari https://www.kaggle.com/datasets/ddosad/auto-sales-data. Data yang berbetuk csv diimpor ke RStudio dengan perintah read_csv yang terdapat pada library(tidyverse) dan diberi nama data.

data <- read_csv("Auto Sales data.csv",show_col_types = FALSE)
data
## # A tibble: 2,747 × 20
##    ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES ORDERDATE 
##          <dbl>           <dbl>     <dbl>           <dbl> <dbl> <chr>     
##  1       10107              30      95.7               2 2871  24/02/2018
##  2       10121              34      81.4               5 2766. 07/05/2018
##  3       10134              41      94.7               2 3884. 01/07/2018
##  4       10145              45      83.3               6 3747. 25/08/2018
##  5       10168              36      96.7               1 3480. 28/10/2018
##  6       10180              29      86.1               9 2498. 11/11/2018
##  7       10188              48     115.                1 5512. 18/11/2018
##  8       10211              41     115.               14 4708. 15/01/2019
##  9       10223              37     107.                1 3966. 20/02/2019
## 10       10237              23     101.                7 2333. 05/04/2019
## # ℹ 2,737 more rows
## # ℹ 14 more variables: DAYS_SINCE_LASTORDER <dbl>, STATUS <chr>,
## #   PRODUCTLINE <chr>, MSRP <dbl>, PRODUCTCODE <chr>, CUSTOMERNAME <chr>,
## #   PHONE <chr>, ADDRESSLINE1 <chr>, CITY <chr>, POSTALCODE <chr>,
## #   COUNTRY <chr>, CONTACTLASTNAME <chr>, CONTACTFIRSTNAME <chr>,
## #   DEALSIZE <chr>

Satelah mengimport data, perintah select digunakan untuk mengambil peubah ORDERNUMBER, QUANTITYORDERED, PRICEEACH, SALES, DAYS_SINCE_LASTORDER, dan PRODUCTLINE dengan masing-masing peubah memiliki 10 observasi (observasi yang diambil merupakan 10 observasi pertama).

TabelData <- select(.data=data,'ORDERNUMBER','QUANTITYORDERED','PRICEEACH','SALES','DAYS_SINCE_LASTORDER',PRODUCTLINE)
TabelData <- TabelData[1:10,]
TabelData
## # A tibble: 10 × 6
##    ORDERNUMBER QUANTITYORDERED PRICEEACH SALES DAYS_SINCE_LASTORDER PRODUCTLINE
##          <dbl>           <dbl>     <dbl> <dbl>                <dbl> <chr>      
##  1       10107              30      95.7 2871                   828 Motorcycles
##  2       10121              34      81.4 2766.                  757 Motorcycles
##  3       10134              41      94.7 3884.                  703 Motorcycles
##  4       10145              45      83.3 3747.                  649 Motorcycles
##  5       10168              36      96.7 3480.                  586 Motorcycles
##  6       10180              29      86.1 2498.                  573 Motorcycles
##  7       10188              48     115.  5512.                  567 Motorcycles
##  8       10211              41     115.  4708.                  510 Motorcycles
##  9       10223              37     107.  3966.                  475 Motorcycles
## 10       10237              23     101.  2333.                  432 Motorcycles

Inspeksi Data Frame

Menghitung dimensi data frame (baris dan kolom)

dim(TabelData)
## [1] 10  6

Menampilkan statistik ringkasan

summary(TabelData)
##   ORDERNUMBER    QUANTITYORDERED   PRICEEACH          SALES     
##  Min.   :10107   Min.   :23.0    Min.   : 81.35   Min.   :2333  
##  1st Qu.:10137   1st Qu.:31.0    1st Qu.: 88.28   1st Qu.:2792  
##  Median :10174   Median :36.5    Median : 96.18   Median :3613  
##  Mean   :10171   Mean   :36.4    Mean   : 97.61   Mean   :3577  
##  3rd Qu.:10205   3rd Qu.:41.0    3rd Qu.:105.75   3rd Qu.:3945  
##  Max.   :10237   Max.   :48.0    Max.   :114.84   Max.   :5512  
##  DAYS_SINCE_LASTORDER PRODUCTLINE       
##  Min.   :432.0        Length:10         
##  1st Qu.:524.2        Class :character  
##  Median :579.5        Mode  :character  
##  Mean   :608.0                          
##  3rd Qu.:689.5                          
##  Max.   :828.0

Analisis Data

Menampilkan ordernumber dan priceeach yang diurutkan berdasarkan sales terkecil hingga terbesar
TabelData %>%
  select(ORDERNUMBER, PRICEEACH, SALES) %>%
  arrange(SALES)
## # A tibble: 10 × 3
##    ORDERNUMBER PRICEEACH SALES
##          <dbl>     <dbl> <dbl>
##  1       10237     101.  2333.
##  2       10180      86.1 2498.
##  3       10121      81.4 2766.
##  4       10107      95.7 2871 
##  5       10168      96.7 3480.
##  6       10145      83.3 3747.
##  7       10134      94.7 3884.
##  8       10223     107.  3966.
##  9       10211     115.  4708.
## 10       10188     115.  5512.
Menampilkan ordernumber dengan day_since_lastorder terlama
TabelData %>%
  select(ORDERNUMBER, DAYS_SINCE_LASTORDER) %>%
  arrange(desc(DAYS_SINCE_LASTORDER))
## # A tibble: 10 × 2
##    ORDERNUMBER DAYS_SINCE_LASTORDER
##          <dbl>                <dbl>
##  1       10107                  828
##  2       10121                  757
##  3       10134                  703
##  4       10145                  649
##  5       10168                  586
##  6       10180                  573
##  7       10188                  567
##  8       10211                  510
##  9       10223                  475
## 10       10237                  432
Menampilkan ordernumber dan sales dengan quanrityordered lebih dari 35 dan diurutkan dari sales terbesar hingga terkecil
TabelData %>%
  select(ORDERNUMBER, SALES, QUANTITYORDERED) %>%
  filter(QUANTITYORDERED > 35) %>%
  arrange(desc(SALES))
## # A tibble: 6 × 3
##   ORDERNUMBER SALES QUANTITYORDERED
##         <dbl> <dbl>           <dbl>
## 1       10188 5512.              48
## 2       10211 4708.              41
## 3       10223 3966.              37
## 4       10134 3884.              41
## 5       10145 3747.              45
## 6       10168 3480.              36

Plot

Histogram

Grafik histogram sales

Sales <- TabelData$SALES
hist(Sales, main="Grafik Histogram Sales", col = "orange")

Boxplot

Grafik boxplot Quantity Oredered dan Price Each

boxplot(TabelData$QUANTITYORDERED, TabelData$PRICEEACH, main="Grafik Boxplot Quantity Oredered dan Price Each", col="cyan")

Korelasi

Menghitung korelasi antara QUANTITYORDERED , PRICEEACH , dan SALES

data2 <- TabelData[1:10,2:4]
data2
## # A tibble: 10 × 3
##    QUANTITYORDERED PRICEEACH SALES
##              <dbl>     <dbl> <dbl>
##  1              30      95.7 2871 
##  2              34      81.4 2766.
##  3              41      94.7 3884.
##  4              45      83.3 3747.
##  5              36      96.7 3480.
##  6              29      86.1 2498.
##  7              48     115.  5512.
##  8              41     115.  4708.
##  9              37     107.  3966.
## 10              23     101.  2333.

Korelasi:

cor(data2)
##                 QUANTITYORDERED PRICEEACH     SALES
## QUANTITYORDERED       1.0000000 0.2790934 0.8858049
## PRICEEACH             0.2790934 1.0000000 0.6872597
## SALES                 0.8858049 0.6872597 1.0000000