library(readxl)
df_customer <- read_excel("df_customerr.xlsx")
df_customer
## # A tibble: 300 × 5
## ID_Pelanggan Jenis_Kelamin Tempat_Tinggal Penghasilan Total_Belanja
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ID00031 Laki-laki Desa 2227350 2563031
## 2 ID00079 Perempuan Kota 9047608 8369550
## 3 ID00051 Perempuan Kota 9735540 8053033
## 4 ID00014 Laki-laki Kota 13510126 9799876
## 5 ID00067 Perempuan Desa 7773498 6982081
## 6 ID00042 Laki-laki Desa 6666740 4782002
## 7 ID00050 Perempuan Desa 5658721 4286283
## 8 ID00043 Laki-laki Desa 7637656 4779797
## 9 ID00014 Perempuan Desa 6776730 6315967
## 10 ID00025 Laki-laki Kota 10412102 5106141
## # ℹ 290 more rows
nrow(df_customer)
## [1] 300
unique(df_customer$ID_Pelanggan)
## [1] "ID00031" "ID00079" "ID00051" "ID00014" "ID00067" "ID00042" "ID00050"
## [8] "ID00043" "ID00025" "ID00090" "ID00091" "ID00069" "ID00057" "ID00092"
## [15] "ID00009" "ID00093" "ID00099" "ID00072" "ID00026" "ID00007" "ID00083"
## [22] "ID00036" "ID00078" "ID00081" "ID00076" "ID00015" "ID00032" "ID00041"
## [29] "ID00074" "ID00023" "ID00027" "ID00060" "ID00053" "ID00096" "ID00038"
## [36] "ID00089" "ID00034" "ID00063" "ID00013" "ID00082" "ID00097" "ID00021"
## [43] "ID00047" "ID00095" "ID00016" "ID00094" "ID00006" "ID00086" "ID00039"
## [50] "ID00004" "ID00052" "ID00022" "ID00087" "ID00035" "ID00040" "ID00030"
## [57] "ID00012" "ID00064" "ID00071" "ID00085" "ID00037" "ID00008" "ID00098"
## [64] "ID00084" "ID00046" "ID00017" "ID00062" "ID00054" "ID00024" "ID00005"
## [71] "ID00070" "ID00055" "ID00075" "ID00048" "ID00077" "ID00056" "ID00068"
## [78] "ID00001" "ID00088" "ID00020" "ID00049" "ID00059" "ID00011" "ID00066"
## [85] "ID00044" "ID00045" "ID00033" "ID00010" "ID00058" "ID00061" "ID00029"
## [92] "ID00073" "ID00018" "ID00002"
length(unique(df_customer$ID_Pelanggan))
## [1] 94
sort(table(df_customer$ID_Pelanggan), decreasing = TRUE)[1:5]
##
## ID00007 ID00025 ID00089 ID00093 ID00026
## 9 7 7 7 6
aggregate(Penghasilan ~ Jenis_Kelamin, data = df_customer, mean)
## Jenis_Kelamin Penghasilan
## 1 Laki-laki 8880902
## 2 Perempuan 8505199
aggregate(Total_Belanja ~ Jenis_Kelamin, data = df_customer, mean)
## Jenis_Kelamin Total_Belanja
## 1 Laki-laki 6034728
## 2 Perempuan 7114786
aggregate(Penghasilan ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Penghasilan
## 1 Desa 6249122
## 2 Kota 9878685
aggregate(Total_Belanja ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Total_Belanja
## 1 Desa 5022231
## 2 Kota 7520118
df_customer[order(-df_customer$Total_Belanja), c("ID_Pelanggan", "Total_Belanja")] |> head(5)
## # A tibble: 5 × 2
## ID_Pelanggan Total_Belanja
## <chr> <dbl>
## 1 ID00034 11626302
## 2 ID00011 11527638
## 3 ID00057 11031197
## 4 ID00093 10984825
## 5 ID00007 10846012
table(df_customer$Jenis_Kelamin)
##
## Laki-laki Perempuan
## 121 179
df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan,
breaks = c(-Inf, 5000000, 10000000, Inf),
labels = c("Rendah", "Menengah", "Tinggi"))
table(df_customer$Kategori_Penghasilan)
##
## Rendah Menengah Tinggi
## 27 175 98
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 1. Pelanggan yang paling sering membeli dengan total belanja > 5.000.000
q11 <- df_customer %>%
filter(Total_Belanja > 5000000) %>%
count(ID_Pelanggan, sort = TRUE) %>%
slice_head(n=6)
q11
## # A tibble: 6 × 2
## ID_Pelanggan n
## <chr> <int>
## 1 ID00007 7
## 2 ID00025 7
## 3 ID00026 6
## 4 ID00089 6
## 5 ID00053 5
## 6 ID00079 5
# 2. Jumlah perempuan di kota yang berbelanja lebih dari 5x
q2 <- df_customer %>%
filter(Jenis_Kelamin == "Perempuan", Tempat_Tinggal == "Kota") %>%
summarise(jumlah = n())
q2
## # A tibble: 1 × 1
## jumlah
## <int>
## 1 120
# 3. Pelanggan yang paling sering membeli dengan penghasilan > 5.000.000
q33 <- df_customer %>%
filter(Penghasilan > 5000000) %>%
count(ID_Pelanggan, sort = TRUE) %>%
slice_head(n=6)
q33
## # A tibble: 6 × 2
## ID_Pelanggan n
## <chr> <int>
## 1 ID00007 9
## 2 ID00025 7
## 3 ID00093 7
## 4 ID00026 6
## 5 ID00089 6
## 6 ID00009 5
#4. Jenis kelamin pelanggan di desa dengan total belanja > 5.000.000
q4 <- df_customer %>%
filter(Tempat_Tinggal == "Desa", Total_Belanja > 5000000) %>%
count(Jenis_Kelamin)
q4
## # A tibble: 2 × 2
## Jenis_Kelamin n
## <chr> <int>
## 1 Laki-laki 10
## 2 Perempuan 37
# 5. Berpenghasilan berapa pelanggan di desa dengan total belanja > 5.000.000
q5 <- df_customer %>%
filter(Tempat_Tinggal == "Desa", Total_Belanja > 5000000) %>%
select(ID_Pelanggan, Penghasilan)
q5
## # A tibble: 47 × 2
## ID_Pelanggan Penghasilan
## <chr> <dbl>
## 1 ID00067 7773498
## 2 ID00014 6776730
## 3 ID00027 8108645
## 4 ID00089 9032981
## 5 ID00034 5616450
## 6 ID00013 4481204
## 7 ID00091 6128487
## 8 ID00038 5947963
## 9 ID00041 9231091
## 10 ID00047 5940612
## # ℹ 37 more rows