df_customer <- read.csv("df_customer.csv")
head(df_customer)
## X ID_Pelanggan Jenis_Kelamin Tempat_Tinggal Penghasilan Total_Belanja
## 1 1 ID00031 Laki-laki Desa 2227350 2563031
## 2 2 ID00079 Perempuan Kota 9047608 8369550
## 3 3 ID00051 Perempuan Kota 9735540 8053033
## 4 4 ID00014 Laki-laki Kota 13510126 9799876
## 5 5 ID00067 Perempuan Desa 7773498 6982081
## 6 6 ID00042 Laki-laki Desa 6666740 4782002
length(unique(df_customer$ID_Pelanggan))
## [1] 94
nrow(df_customer)
## [1] 300
sort(table(df_customer$ID_Pelanggan), decreasing = TRUE)[1:3]
##
## ID00007 ID00025 ID00089
## 9 7 7
aggregate(Penghasilan ~ Jenis_Kelamin, data = df_customer, mean)
## Jenis_Kelamin Penghasilan
## 1 Laki-laki 8880902
## 2 Perempuan 8505199
aggregate(Penghasilan ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Penghasilan
## 1 Desa 6249122
## 2 Kota 9878685
aggregate(Total_Belanja ~ Tempat_Tinggal, data = df_customer, mean)
## Tempat_Tinggal Total_Belanja
## 1 Desa 5022231
## 2 Kota 7520118
df_customer[order(-df_customer$Total_Belanja), c("ID_Pelanggan", "Total_Belanja")] |> head (5)
## ID_Pelanggan Total_Belanja
## 76 ID00034 11626302
## 175 ID00011 11527638
## 228 ID00057 11031197
## 287 ID00093 10984825
## 33 ID00007 10846012
table(df_customer$Jenis_Kelamin)
##
## Laki-laki Perempuan
## 121 179
df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan, breaks = c(-Inf, 5000000, 10000000, Inf), labels = c("Rendah", "Menengah","Tinggi"))
table(df_customer$Kategori_Penghasilan)
##
## Rendah Menengah Tinggi
## 27 175 98
df_customer$ID_Pelanggan
## [1] "ID00031" "ID00079" "ID00051" "ID00014" "ID00067" "ID00042" "ID00050"
## [8] "ID00043" "ID00014" "ID00025" "ID00090" "ID00091" "ID00069" "ID00091"
## [15] "ID00057" "ID00092" "ID00009" "ID00093" "ID00099" "ID00072" "ID00026"
## [22] "ID00007" "ID00042" "ID00009" "ID00083" "ID00036" "ID00078" "ID00081"
## [29] "ID00043" "ID00076" "ID00015" "ID00032" "ID00007" "ID00009" "ID00041"
## [36] "ID00074" "ID00023" "ID00027" "ID00060" "ID00053" "ID00007" "ID00053"
## [43] "ID00027" "ID00096" "ID00038" "ID00089" "ID00034" "ID00093" "ID00069"
## [50] "ID00072" "ID00076" "ID00063" "ID00013" "ID00082" "ID00097" "ID00091"
## [57] "ID00025" "ID00038" "ID00021" "ID00079" "ID00041" "ID00047" "ID00090"
## [64] "ID00060" "ID00095" "ID00016" "ID00094" "ID00006" "ID00072" "ID00086"
## [71] "ID00086" "ID00039" "ID00031" "ID00081" "ID00050" "ID00034" "ID00004"
## [78] "ID00013" "ID00069" "ID00025" "ID00052" "ID00022" "ID00089" "ID00032"
## [85] "ID00025" "ID00087" "ID00035" "ID00040" "ID00030" "ID00012" "ID00031"
## [92] "ID00030" "ID00064" "ID00099" "ID00014" "ID00093" "ID00096" "ID00071"
## [99] "ID00067" "ID00023" "ID00079" "ID00085" "ID00037" "ID00008" "ID00051"
## [106] "ID00074" "ID00050" "ID00098" "ID00074" "ID00086" "ID00076" "ID00084"
## [113] "ID00046" "ID00017" "ID00062" "ID00046" "ID00054" "ID00035" "ID00094"
## [120] "ID00079" "ID00024" "ID00087" "ID00007" "ID00093" "ID00079" "ID00023"
## [127] "ID00026" "ID00032" "ID00007" "ID00027" "ID00042" "ID00005" "ID00070"
## [134] "ID00016" "ID00024" "ID00032" "ID00021" "ID00055" "ID00075" "ID00036"
## [141] "ID00083" "ID00089" "ID00039" "ID00054" "ID00090" "ID00009" "ID00071"
## [148] "ID00098" "ID00048" "ID00077" "ID00083" "ID00056" "ID00039" "ID00068"
## [155] "ID00001" "ID00040" "ID00030" "ID00094" "ID00089" "ID00016" "ID00088"
## [162] "ID00054" "ID00075" "ID00048" "ID00020" "ID00067" "ID00093" "ID00036"
## [169] "ID00052" "ID00022" "ID00049" "ID00042" "ID00059" "ID00084" "ID00011"
## [176] "ID00055" "ID00008" "ID00046" "ID00085" "ID00066" "ID00077" "ID00046"
## [183] "ID00070" "ID00072" "ID00044" "ID00032" "ID00036" "ID00045" "ID00014"
## [190] "ID00016" "ID00087" "ID00033" "ID00040" "ID00040" "ID00010" "ID00089"
## [197] "ID00072" "ID00082" "ID00009" "ID00007" "ID00007" "ID00058" "ID00061"
## [204] "ID00074" "ID00024" "ID00063" "ID00054" "ID00023" "ID00026" "ID00033"
## [211] "ID00057" "ID00029" "ID00010" "ID00053" "ID00054" "ID00077" "ID00011"
## [218] "ID00025" "ID00052" "ID00026" "ID00007" "ID00025" "ID00087" "ID00090"
## [225] "ID00032" "ID00084" "ID00024" "ID00057" "ID00073" "ID00023" "ID00014"
## [232] "ID00006" "ID00091" "ID00001" "ID00090" "ID00058" "ID00081" "ID00029"
## [239] "ID00026" "ID00027" "ID00085" "ID00007" "ID00060" "ID00026" "ID00041"
## [246] "ID00084" "ID00006" "ID00094" "ID00031" "ID00093" "ID00017" "ID00064"
## [253] "ID00037" "ID00057" "ID00020" "ID00035" "ID00089" "ID00033" "ID00066"
## [260] "ID00004" "ID00074" "ID00097" "ID00005" "ID00025" "ID00008" "ID00055"
## [267] "ID00089" "ID00085" "ID00045" "ID00018" "ID00042" "ID00031" "ID00006"
## [274] "ID00071" "ID00061" "ID00048" "ID00017" "ID00045" "ID00092" "ID00063"
## [281] "ID00053" "ID00053" "ID00063" "ID00071" "ID00084" "ID00082" "ID00093"
## [288] "ID00017" "ID00097" "ID00002" "ID00049" "ID00002" "ID00013" "ID00024"
## [295] "ID00049" "ID00067" "ID00082" "ID00002" "ID00037" "ID00063"
freq <- as.data.frame(table(df_customer$ID_Pelanggan))
freq
## Var1 Freq
## 1 ID00001 2
## 2 ID00002 3
## 3 ID00004 2
## 4 ID00005 2
## 5 ID00006 4
## 6 ID00007 9
## 7 ID00008 3
## 8 ID00009 5
## 9 ID00010 2
## 10 ID00011 2
## 11 ID00012 1
## 12 ID00013 3
## 13 ID00014 5
## 14 ID00015 1
## 15 ID00016 4
## 16 ID00017 4
## 17 ID00018 1
## 18 ID00020 2
## 19 ID00021 2
## 20 ID00022 2
## 21 ID00023 5
## 22 ID00024 5
## 23 ID00025 7
## 24 ID00026 6
## 25 ID00027 4
## 26 ID00029 2
## 27 ID00030 3
## 28 ID00031 5
## 29 ID00032 6
## 30 ID00033 3
## 31 ID00034 2
## 32 ID00035 3
## 33 ID00036 4
## 34 ID00037 3
## 35 ID00038 2
## 36 ID00039 3
## 37 ID00040 4
## 38 ID00041 3
## 39 ID00042 5
## 40 ID00043 2
## 41 ID00044 1
## 42 ID00045 3
## 43 ID00046 4
## 44 ID00047 1
## 45 ID00048 3
## 46 ID00049 3
## 47 ID00050 3
## 48 ID00051 2
## 49 ID00052 3
## 50 ID00053 5
## 51 ID00054 5
## 52 ID00055 3
## 53 ID00056 1
## 54 ID00057 4
## 55 ID00058 2
## 56 ID00059 1
## 57 ID00060 3
## 58 ID00061 2
## 59 ID00062 1
## 60 ID00063 5
## 61 ID00064 2
## 62 ID00066 2
## 63 ID00067 4
## 64 ID00068 1
## 65 ID00069 3
## 66 ID00070 2
## 67 ID00071 4
## 68 ID00072 5
## 69 ID00073 1
## 70 ID00074 5
## 71 ID00075 2
## 72 ID00076 3
## 73 ID00077 3
## 74 ID00078 1
## 75 ID00079 5
## 76 ID00081 3
## 77 ID00082 4
## 78 ID00083 3
## 79 ID00084 5
## 80 ID00085 4
## 81 ID00086 3
## 82 ID00087 4
## 83 ID00088 1
## 84 ID00089 7
## 85 ID00090 5
## 86 ID00091 4
## 87 ID00092 2
## 88 ID00093 7
## 89 ID00094 4
## 90 ID00095 1
## 91 ID00096 2
## 92 ID00097 3
## 93 ID00098 2
## 94 ID00099 2
df_customer <- read.csv ("df_customer.csv")
#1 pelanggan yang paling sering membeli dengan total belanja > 5.000.000
q1 <- df_customer[df_customer$Total_Belanja > 5000000,]
q1 <- sort(table(q1$ID_Pelanggan), decreasing = TRUE)
head(q1)
##
## ID00007 ID00025 ID00026 ID00089 ID00053 ID00079
## 7 7 6 6 5 5
perempuan_kota <- subset(df_customer, Jenis_Kelamin == "Perempuan" & Tempat_Tinggal == "Kota")
frekuensi_perempuan_kota <- table(perempuan_kota$ID_Pelanggan)
sum(frekuensi_perempuan_kota > 5)
## [1] 0
#3. Pelanggan yang paling sering membeli dengan penghasilan > 5.000.000
q3 <- df_customer[df_customer$Penghasilan > 5000000, ]
q3 <- sort(table(q3$ID_Pelanggan), decreasing = TRUE)
head(q3, 1)
##
## ID00007
## 9
q3 <- df_customer[df_customer$Penghasilan > 5000000, ]
q3 <- sort(table(q3$ID_Pelanggan), decreasing = TRUE)
head(q3, 1)
##
## ID00007
## 9
# 4. Berjenis kelamin apa pelanggan yang tinggal di desa dengan total belanja > 5000000
q4 <- subset(df_customer, Tempat_Tinggal == "Desa" & Total_Belanja > 5000000)
table(q4$Jenis_Kelamin)
##
## Laki-laki Perempuan
## 10 37
q4 <- subset(df_customer, Tempat_Tinggal == "Desa" & Total_Belanja > 5000000)
table(q4$Jenis_Kelamin)
##
## Laki-laki Perempuan
## 10 37
# 5. Berpenghasilan berapa pelanggan yang tinggal di desa namun memiliki total belanja > 5.000.000
q5 <- subset(df_customer, Tempat_Tinggal == "Desa" & Total_Belanja > 5000000)
head(q5[, c("ID_Pelanggan", "Penghasilan")], 5)
## ID_Pelanggan Penghasilan
## 5 ID00067 7773498
## 9 ID00014 6776730
## 43 ID00027 8108645
## 46 ID00089 9032981
## 47 ID00034 5616450