R Markdown

df_customer <- read.csv("df_customer.csv")
head(df_customer)
##   X ID_Pelanggan Jenis_Kelamin Tempat_Tinggal Penghasilan Total_Belanja
## 1 1      ID00031     Laki-laki           Desa     2227350       2563031
## 2 2      ID00079     Perempuan           Kota     9047608       8369550
## 3 3      ID00051     Perempuan           Kota     9735540       8053033
## 4 4      ID00014     Laki-laki           Kota    13510126       9799876
## 5 5      ID00067     Perempuan           Desa     7773498       6982081
## 6 6      ID00042     Laki-laki           Desa     6666740       4782002
nrow(df_customer)
## [1] 300
length(unique(df_customer$ID_Pelanggan))
## [1] 94
sort(table(df_customer$ID_Pelanggan), decreasing = TRUE)[1:3]
## 
## ID00007 ID00025 ID00089 
##       9       7       7
aggregate(Penghasilan ~ Jenis_Kelamin, data = df_customer, mean)
##   Jenis_Kelamin Penghasilan
## 1     Laki-laki     8880902
## 2     Perempuan     8505199
aggregate(Total_Belanja ~ Jenis_Kelamin, data = df_customer, mean)
##   Jenis_Kelamin Total_Belanja
## 1     Laki-laki       6034728
## 2     Perempuan       7114786
aggregate(Penghasilan ~ Tempat_Tinggal, data = df_customer, mean)
##   Tempat_Tinggal Penghasilan
## 1           Desa     6249122
## 2           Kota     9878685
aggregate(Total_Belanja ~ Tempat_Tinggal, data = df_customer, mean)
##   Tempat_Tinggal Total_Belanja
## 1           Desa       5022231
## 2           Kota       7520118
df_customer[order(-df_customer$Total_Belanja), c("ID_Pelanggan", "Total_Belanja")] |> head(5)
##     ID_Pelanggan Total_Belanja
## 76       ID00034      11626302
## 175      ID00011      11527638
## 228      ID00057      11031197
## 287      ID00093      10984825
## 33       ID00007      10846012
table(df_customer$Jenis_Kelamin)
## 
## Laki-laki Perempuan 
##       121       179
## 
df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan,
                                        breaks = c(-Inf, 5000000, 10000000, Inf),
                                        labels = c("Rendah", "Menengah", "Tinggi"))
table(df_customer$Kategori_Penghasilan)
## 
##   Rendah Menengah   Tinggi 
##       27      175       98
df_customer$ID_Pelanggan
##   [1] "ID00031" "ID00079" "ID00051" "ID00014" "ID00067" "ID00042" "ID00050"
##   [8] "ID00043" "ID00014" "ID00025" "ID00090" "ID00091" "ID00069" "ID00091"
##  [15] "ID00057" "ID00092" "ID00009" "ID00093" "ID00099" "ID00072" "ID00026"
##  [22] "ID00007" "ID00042" "ID00009" "ID00083" "ID00036" "ID00078" "ID00081"
##  [29] "ID00043" "ID00076" "ID00015" "ID00032" "ID00007" "ID00009" "ID00041"
##  [36] "ID00074" "ID00023" "ID00027" "ID00060" "ID00053" "ID00007" "ID00053"
##  [43] "ID00027" "ID00096" "ID00038" "ID00089" "ID00034" "ID00093" "ID00069"
##  [50] "ID00072" "ID00076" "ID00063" "ID00013" "ID00082" "ID00097" "ID00091"
##  [57] "ID00025" "ID00038" "ID00021" "ID00079" "ID00041" "ID00047" "ID00090"
##  [64] "ID00060" "ID00095" "ID00016" "ID00094" "ID00006" "ID00072" "ID00086"
##  [71] "ID00086" "ID00039" "ID00031" "ID00081" "ID00050" "ID00034" "ID00004"
##  [78] "ID00013" "ID00069" "ID00025" "ID00052" "ID00022" "ID00089" "ID00032"
##  [85] "ID00025" "ID00087" "ID00035" "ID00040" "ID00030" "ID00012" "ID00031"
##  [92] "ID00030" "ID00064" "ID00099" "ID00014" "ID00093" "ID00096" "ID00071"
##  [99] "ID00067" "ID00023" "ID00079" "ID00085" "ID00037" "ID00008" "ID00051"
## [106] "ID00074" "ID00050" "ID00098" "ID00074" "ID00086" "ID00076" "ID00084"
## [113] "ID00046" "ID00017" "ID00062" "ID00046" "ID00054" "ID00035" "ID00094"
## [120] "ID00079" "ID00024" "ID00087" "ID00007" "ID00093" "ID00079" "ID00023"
## [127] "ID00026" "ID00032" "ID00007" "ID00027" "ID00042" "ID00005" "ID00070"
## [134] "ID00016" "ID00024" "ID00032" "ID00021" "ID00055" "ID00075" "ID00036"
## [141] "ID00083" "ID00089" "ID00039" "ID00054" "ID00090" "ID00009" "ID00071"
## [148] "ID00098" "ID00048" "ID00077" "ID00083" "ID00056" "ID00039" "ID00068"
## [155] "ID00001" "ID00040" "ID00030" "ID00094" "ID00089" "ID00016" "ID00088"
## [162] "ID00054" "ID00075" "ID00048" "ID00020" "ID00067" "ID00093" "ID00036"
## [169] "ID00052" "ID00022" "ID00049" "ID00042" "ID00059" "ID00084" "ID00011"
## [176] "ID00055" "ID00008" "ID00046" "ID00085" "ID00066" "ID00077" "ID00046"
## [183] "ID00070" "ID00072" "ID00044" "ID00032" "ID00036" "ID00045" "ID00014"
## [190] "ID00016" "ID00087" "ID00033" "ID00040" "ID00040" "ID00010" "ID00089"
## [197] "ID00072" "ID00082" "ID00009" "ID00007" "ID00007" "ID00058" "ID00061"
## [204] "ID00074" "ID00024" "ID00063" "ID00054" "ID00023" "ID00026" "ID00033"
## [211] "ID00057" "ID00029" "ID00010" "ID00053" "ID00054" "ID00077" "ID00011"
## [218] "ID00025" "ID00052" "ID00026" "ID00007" "ID00025" "ID00087" "ID00090"
## [225] "ID00032" "ID00084" "ID00024" "ID00057" "ID00073" "ID00023" "ID00014"
## [232] "ID00006" "ID00091" "ID00001" "ID00090" "ID00058" "ID00081" "ID00029"
## [239] "ID00026" "ID00027" "ID00085" "ID00007" "ID00060" "ID00026" "ID00041"
## [246] "ID00084" "ID00006" "ID00094" "ID00031" "ID00093" "ID00017" "ID00064"
## [253] "ID00037" "ID00057" "ID00020" "ID00035" "ID00089" "ID00033" "ID00066"
## [260] "ID00004" "ID00074" "ID00097" "ID00005" "ID00025" "ID00008" "ID00055"
## [267] "ID00089" "ID00085" "ID00045" "ID00018" "ID00042" "ID00031" "ID00006"
## [274] "ID00071" "ID00061" "ID00048" "ID00017" "ID00045" "ID00092" "ID00063"
## [281] "ID00053" "ID00053" "ID00063" "ID00071" "ID00084" "ID00082" "ID00093"
## [288] "ID00017" "ID00097" "ID00002" "ID00049" "ID00002" "ID00013" "ID00024"
## [295] "ID00049" "ID00067" "ID00082" "ID00002" "ID00037" "ID00063"
length(unique(df_customer$ID_Pelanggan))
## [1] 94
df_customer$Tempat_Tinggal
##   [1] "Desa" "Kota" "Kota" "Kota" "Desa" "Desa" "Desa" "Desa" "Desa" "Kota"
##  [11] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota"
##  [21] "Kota" "Kota" "Kota" "Kota" "Desa" "Kota" "Kota" "Desa" "Kota" "Kota"
##  [31] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Desa" "Desa" "Kota" "Kota"
##  [41] "Kota" "Kota" "Desa" "Kota" "Kota" "Desa" "Desa" "Desa" "Kota" "Desa"
##  [51] "Desa" "Kota" "Desa" "Kota" "Kota" "Desa" "Kota" "Desa" "Kota" "Kota"
##  [61] "Desa" "Desa" "Kota" "Kota" "Desa" "Kota" "Kota" "Kota" "Kota" "Kota"
##  [71] "Desa" "Kota" "Desa" "Desa" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota"
##  [81] "Kota" "Desa" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota"
##  [91] "Kota" "Kota" "Desa" "Desa" "Desa" "Desa" "Desa" "Desa" "Kota" "Kota"
## [101] "Kota" "Kota" "Kota" "Desa" "Kota" "Desa" "Kota" "Desa" "Desa" "Kota"
## [111] "Kota" "Kota" "Kota" "Desa" "Desa" "Kota" "Kota" "Kota" "Desa" "Kota"
## [121] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Desa"
## [131] "Kota" "Kota" "Desa" "Desa" "Kota" "Desa" "Kota" "Desa" "Desa" "Kota"
## [141] "Kota" "Kota" "Kota" "Kota" "Desa" "Kota" "Kota" "Desa" "Desa" "Kota"
## [151] "Kota" "Kota" "Kota" "Desa" "Kota" "Desa" "Kota" "Desa" "Desa" "Kota"
## [161] "Desa" "Desa" "Kota" "Kota" "Desa" "Desa" "Kota" "Kota" "Kota" "Kota"
## [171] "Desa" "Kota" "Kota" "Kota" "Kota" "Desa" "Desa" "Desa" "Kota" "Kota"
## [181] "Desa" "Kota" "Kota" "Desa" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota"
## [191] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Desa" "Kota" "Desa" "Desa"
## [201] "Kota" "Desa" "Desa" "Kota" "Kota" "Desa" "Desa" "Kota" "Desa" "Desa"
## [211] "Kota" "Kota" "Kota" "Kota" "Kota" "Desa" "Desa" "Kota" "Desa" "Desa"
## [221] "Kota" "Kota" "Kota" "Desa" "Desa" "Kota" "Desa" "Kota" "Desa" "Kota"
## [231] "Desa" "Desa" "Desa" "Kota" "Kota" "Kota" "Desa" "Kota" "Kota" "Desa"
## [241] "Kota" "Kota" "Kota" "Kota" "Desa" "Kota" "Kota" "Desa" "Desa" "Desa"
## [251] "Kota" "Kota" "Kota" "Desa" "Desa" "Kota" "Desa" "Kota" "Desa" "Kota"
## [261] "Kota" "Desa" "Desa" "Kota" "Desa" "Kota" "Kota" "Kota" "Kota" "Kota"
## [271] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Desa" "Kota" "Kota" "Desa"
## [281] "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Kota" "Desa" "Kota"
## [291] "Kota" "Kota" "Desa" "Kota" "Desa" "Kota" "Desa" "Kota" "Kota" "Kota"
sort(table(df_customer$ID_Pelanggan),decreasing = TRUE)[1:5]
## 
## ID00007 ID00025 ID00089 ID00093 ID00026 
##       9       7       7       7       6
sort(table(df_customer$ID_Pelanggan),decreasing = TRUE)[1:10]
## 
## ID00007 ID00025 ID00089 ID00093 ID00026 ID00032 ID00009 ID00014 ID00023 ID00024 
##       9       7       7       7       6       6       5       5       5       5

// 1.Siapa pelanggan yang paling sering membeli dengan total belanja lebih dari 5000000

df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan,
                                        breaks = c(5000000, Inf),
                                        labels = c("Tinggi"))
table(df_customer$Kategori_Penghasilan)
## 
## Tinggi 
##    273
sort(table(df_customer$ID_Pelanggan))
## 
## ID00012 ID00015 ID00018 ID00044 ID00047 ID00056 ID00059 ID00062 ID00068 ID00073 
##       1       1       1       1       1       1       1       1       1       1 
## ID00078 ID00088 ID00095 ID00001 ID00004 ID00005 ID00010 ID00011 ID00020 ID00021 
##       1       1       1       2       2       2       2       2       2       2 
## ID00022 ID00029 ID00034 ID00038 ID00043 ID00051 ID00058 ID00061 ID00064 ID00066 
##       2       2       2       2       2       2       2       2       2       2 
## ID00070 ID00075 ID00092 ID00096 ID00098 ID00099 ID00002 ID00008 ID00013 ID00030 
##       2       2       2       2       2       2       3       3       3       3 
## ID00033 ID00035 ID00037 ID00039 ID00041 ID00045 ID00048 ID00049 ID00050 ID00052 
##       3       3       3       3       3       3       3       3       3       3 
## ID00055 ID00060 ID00069 ID00076 ID00077 ID00081 ID00083 ID00086 ID00097 ID00006 
##       3       3       3       3       3       3       3       3       3       4 
## ID00016 ID00017 ID00027 ID00036 ID00040 ID00046 ID00057 ID00067 ID00071 ID00082 
##       4       4       4       4       4       4       4       4       4       4 
## ID00085 ID00087 ID00091 ID00094 ID00009 ID00014 ID00023 ID00024 ID00031 ID00042 
##       4       4       4       4       5       5       5       5       5       5 
## ID00053 ID00054 ID00063 ID00072 ID00074 ID00079 ID00084 ID00090 ID00026 ID00032 
##       5       5       5       5       5       5       5       5       6       6 
## ID00025 ID00089 ID00093 ID00007 
##       7       7       7       9
sort(table(df_customer$Total_Belanja>5000000))
## 
## FALSE  TRUE 
##    62   238
q1 <- df_customer[df_customer$Total_Belanja > 5000000,]
q1 <- sort(table(q1$ID_Pelanggan), decreasing = TRUE)
head(q1)
## 
## ID00007 ID00025 ID00026 ID00089 ID00053 ID00079 
##       7       7       6       6       5       5

//2. Ada berapa banyak perempuan di kota yang berbelanja lebih dari 5x

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
q22 <- df_customer %>%
  filter(Jenis_Kelamin == "Perempuan", Tempat_Tinggal == "Kota") %>%
  count(ID_Pelanggan) %>%
  filter(n > 5) %>%
  summarise(jumlah = n())
q22
##   jumlah
## 1      0

//3. Siapa pelanggan yang paling sering membeli dengan penghasilan lebih dari 5000000

q3 <- df_customer[df_customer$Penghasilan > 5000000, ]
q3 <- sort(table(q3$ID_Pelanggan), decreasing = TRUE)
head(q3, 1)
## 
## ID00007 
##       9
library(dplyr)
q33 <- df_customer %>%
  filter(Penghasilan > 5000000) %>%
  count(ID_Pelanggan, sort = TRUE) %>%
  slice_head(n = 1)
q33
##   ID_Pelanggan n
## 1      ID00007 9

//4. Berjenis kelamin apa pelanggan yang tinggal di desa namun memiliki total belanja yang lebih dari 5000000

q4 <- subset(df_customer, Tempat_Tinggal == "Desa" & Total_Belanja > 5000000)
table(q4$Jenis_Kelamin)
## 
## Laki-laki Perempuan 
##        10        37
library(dplyr)
q44 <- df_customer %>%
  filter(Tempat_Tinggal == "Desa", Total_Belanja > 5000000) %>%
  count(Jenis_Kelamin)
q44
##   Jenis_Kelamin  n
## 1     Laki-laki 10
## 2     Perempuan 37

//5. Berpenghasilan berapa pelanggan yang tinggal di desa namun memiliki total belanja lebih dari 5000000

q5 <- subset(df_customer, Tempat_Tinggal == "Desa" & Total_Belanja > 5000000)
head(q5[, c("ID_Pelanggan", "Penghasilan")], 5)
##    ID_Pelanggan Penghasilan
## 5       ID00067     7773498
## 9       ID00014     6776730
## 43      ID00027     8108645
## 46      ID00089     9032981
## 47      ID00034     5616450
library(dplyr)
q55 <- df_customer %>%
  filter(Tempat_Tinggal == "Desa", Total_Belanja > 5000000) %>%
  select(ID_Pelanggan, Penghasilan) %>%
  head(5)
q55
##   ID_Pelanggan Penghasilan
## 1      ID00067     7773498
## 2      ID00014     6776730
## 3      ID00027     8108645
## 4      ID00089     9032981
## 5      ID00034     5616450