library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df_customer <- read.csv("df_customer.csv")
head(df_customer)
## X ID_Pelanggan Jenis_Kelamin Tempat_Tinggal Penghasilan Total_Belanja
## 1 1 ID00031 Laki-laki Desa 2227350 2563031
## 2 2 ID00079 Perempuan Kota 9047608 8369550
## 3 3 ID00051 Perempuan Kota 9735540 8053033
## 4 4 ID00014 Laki-laki Kota 13510126 9799876
## 5 5 ID00067 Perempuan Desa 7773498 6982081
## 6 6 ID00042 Laki-laki Desa 6666740 4782002
nrow(df_customer)
## [1] 300
length(unique(df_customer$ID_Pelanggan))
## [1] 94
sort(table(df_customer$ID_Pelanggan), decreasing = TRUE)[1:3]
##
## ID00007 ID00025 ID00089
## 9 7 7
aggregate(Penghasilan ~ Jenis_Kelamin, data = df_customer, mean)
## Jenis_Kelamin Penghasilan
## 1 Laki-laki 8880902
## 2 Perempuan 8505199
aggregate(Penghasilan ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Penghasilan
## 1 Desa 6249122
## 2 Kota 9878685
aggregate(Total_Belanja ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Total_Belanja
## 1 Desa 5022231
## 2 Kota 7520118
df_customer[order(-df_customer$Total_Belanja), c("ID_Pelanggan", "Total_Belanja")] |> head(5)
## ID_Pelanggan Total_Belanja
## 76 ID00034 11626302
## 175 ID00011 11527638
## 228 ID00057 11031197
## 287 ID00093 10984825
## 33 ID00007 10846012
table(df_customer$Jenis_Kelamin)
##
## Laki-laki Perempuan
## 121 179
df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan,
breaks = c(-Inf, 5000000, 10000000, Inf),
labels = c("Rendah", "Menengah", "Tinggi"))
table(df_customer$Kategori_Penghasilan)
##
## Rendah Menengah Tinggi
## 27 175 98
df_filter1 <- subset(df_customer, Total_Belanja > 5000000)
table(df_filter1$ID_Pelanggan)
##
## ID00001 ID00002 ID00004 ID00005 ID00006 ID00007 ID00008 ID00009 ID00010 ID00011
## 1 3 2 1 4 7 2 4 2 1
## ID00012 ID00013 ID00014 ID00015 ID00016 ID00017 ID00018 ID00021 ID00022 ID00023
## 1 3 3 1 4 2 1 2 2 4
## ID00024 ID00025 ID00026 ID00027 ID00029 ID00030 ID00031 ID00032 ID00033 ID00034
## 4 7 6 1 1 3 4 4 2 2
## ID00035 ID00036 ID00037 ID00038 ID00039 ID00040 ID00041 ID00042 ID00043 ID00044
## 3 4 3 2 3 3 2 3 1 1
## ID00045 ID00046 ID00047 ID00048 ID00049 ID00050 ID00051 ID00052 ID00053 ID00054
## 3 3 1 2 2 2 2 3 5 2
## ID00055 ID00056 ID00057 ID00058 ID00059 ID00060 ID00061 ID00063 ID00064 ID00066
## 3 1 4 2 1 3 1 4 1 2
## ID00067 ID00069 ID00070 ID00071 ID00072 ID00073 ID00074 ID00075 ID00076 ID00077
## 4 3 1 3 3 1 4 2 2 1
## ID00078 ID00079 ID00082 ID00083 ID00084 ID00085 ID00086 ID00087 ID00088 ID00089
## 1 5 3 2 5 3 2 4 1 6
## ID00090 ID00091 ID00092 ID00093 ID00094 ID00095 ID00096 ID00097 ID00098 ID00099
## 5 4 2 5 3 1 2 2 1 1
sort(table(df_filter1$ID_Pelanggan), decreasing=TRUE)[1]
## ID00007
## 7
df_filter2 <- subset(df_customer, Jenis_Kelamin=="Perempuan" & Tempat_Tinggal=="Kota")
freq2 <- table(df_filter2$ID_Pelanggan)
sum(freq2>5)
## [1] 0
df_filter3 <- subset(df_customer, Penghasilan > 5000000)
table(df_filter3$ID_Pelanggan)
##
## ID00001 ID00002 ID00004 ID00005 ID00006 ID00007 ID00008 ID00009 ID00010 ID00011
## 2 3 2 2 4 9 3 5 2 1
## ID00012 ID00013 ID00014 ID00015 ID00016 ID00017 ID00018 ID00020 ID00021 ID00022
## 1 2 5 1 4 3 1 2 2 2
## ID00023 ID00024 ID00025 ID00026 ID00027 ID00029 ID00030 ID00031 ID00032 ID00033
## 5 5 7 6 2 2 3 4 4 1
## ID00034 ID00035 ID00036 ID00037 ID00038 ID00039 ID00040 ID00041 ID00042 ID00043
## 2 3 4 2 2 3 3 2 4 2
## ID00044 ID00045 ID00046 ID00047 ID00048 ID00049 ID00050 ID00051 ID00052 ID00053
## 1 3 4 1 2 2 3 2 3 5
## ID00054 ID00055 ID00056 ID00057 ID00058 ID00059 ID00060 ID00061 ID00062 ID00063
## 4 3 1 4 2 1 3 2 1 5
## ID00064 ID00066 ID00067 ID00068 ID00069 ID00070 ID00071 ID00072 ID00073 ID00074
## 2 1 4 1 3 2 4 2 1 5
## ID00075 ID00076 ID00077 ID00078 ID00079 ID00081 ID00082 ID00083 ID00084 ID00085
## 2 3 3 1 5 2 4 3 5 4
## ID00086 ID00087 ID00088 ID00089 ID00090 ID00091 ID00092 ID00093 ID00094 ID00095
## 3 4 1 6 5 4 2 7 3 1
## ID00096 ID00097 ID00099
## 2 3 1
sort(table(df_filter3$ID_Pelanggan), decreasing=TRUE)[1]
## ID00007
## 9
df_filter4 <- subset(df_customer, Tempat_Tinggal=="Desa" & Total_Belanja > 5000000)
unique(df_filter4$Jenis_Kelamin)
## [1] "Perempuan" "Laki-laki"
df_filter5 <- subset(df_customer, Tempat_Tinggal=="Desa" & Total_Belanja > 5000000)
df_filter5[, c("ID_Pelanggan","Penghasilan","Total_Belanja")]
## ID_Pelanggan Penghasilan Total_Belanja
## 5 ID00067 7773498 6982081
## 9 ID00014 6776730 6315967
## 43 ID00027 8108645 6901502
## 46 ID00089 9032981 5776859
## 47 ID00034 5616450 7064321
## 53 ID00013 4481204 5438461
## 56 ID00091 6128487 5990469
## 58 ID00038 5947963 5094570
## 61 ID00041 9231091 8371463
## 62 ID00047 5940612 6172545
## 65 ID00095 8032910 6259431
## 73 ID00031 7822419 5581673
## 82 ID00022 9331982 5602663
## 97 ID00096 7082568 5369782
## 106 ID00074 9657061 7154243
## 119 ID00094 7651846 5983600
## 134 ID00016 5575699 5436492
## 138 ID00055 8635642 6262374
## 139 ID00075 5809025 5002921
## 145 ID00090 6005712 5987338
## 148 ID00098 4849165 5535035
## 149 ID00048 4626369 5333229
## 161 ID00088 5455465 5826813
## 166 ID00067 6467267 6344949
## 171 ID00049 3157783 5695306
## 176 ID00055 6093467 6646206
## 177 ID00008 6193172 6100858
## 197 ID00072 9024791 6500860
## 202 ID00058 9310352 6514985
## 209 ID00026 8141032 6663821
## 219 ID00052 6499451 6548809
## 220 ID00026 10832415 7895296
## 224 ID00090 11664452 9796996
## 229 ID00073 6455085 5490472
## 231 ID00014 6571179 6229499
## 232 ID00006 9498495 6134541
## 233 ID00091 5671820 6935452
## 248 ID00094 3726214 5043885
## 249 ID00031 6533837 6686485
## 250 ID00093 6856664 7140298
## 254 ID00057 7212261 5530567
## 257 ID00089 6928182 5259961
## 259 ID00066 3373098 5157567
## 265 ID00008 6940985 5052071
## 280 ID00063 6915574 6201985
## 289 ID00097 7028460 5214133
## 293 ID00013 9810087 5581775