library(readxl)
df_customer <- read_xlsx("df_customerr.xlsx")
df_customer
## # A tibble: 300 × 5
##    ID_Pelanggan Jenis_Kelamin Tempat_Tinggal Penghasilan Total_Belanja
##    <chr>        <chr>         <chr>                <dbl>         <dbl>
##  1 ID00031      Laki-laki     Desa               2227350       2563031
##  2 ID00079      Perempuan     Kota               9047608       8369550
##  3 ID00051      Perempuan     Kota               9735540       8053033
##  4 ID00014      Laki-laki     Kota              13510126       9799876
##  5 ID00067      Perempuan     Desa               7773498       6982081
##  6 ID00042      Laki-laki     Desa               6666740       4782002
##  7 ID00050      Perempuan     Desa               5658721       4286283
##  8 ID00043      Laki-laki     Desa               7637656       4779797
##  9 ID00014      Perempuan     Desa               6776730       6315967
## 10 ID00025      Laki-laki     Kota              10412102       5106141
## # ℹ 290 more rows
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
nrow(df_customer)
## [1] 300
length(unique(df_customer$ID_Pelanggan))
## [1] 94
sort(table(df_customer$ID_Pelanggan), decreasing = TRUE)[1:3]
## 
## ID00007 ID00025 ID00089 
##       9       7       7
aggregate(Penghasilan ~ Jenis_Kelamin, data = df_customer, mean)
##   Jenis_Kelamin Penghasilan
## 1     Laki-laki     8880902
## 2     Perempuan     8505199
aggregate(Penghasilan ~ Tempat_Tinggal, data = df_customer, mean)
##   Tempat_Tinggal Penghasilan
## 1           Desa     6249122
## 2           Kota     9878685
aggregate(Total_Belanja ~ Tempat_Tinggal, data = df_customer, mean)
##   Tempat_Tinggal Total_Belanja
## 1           Desa       5022231
## 2           Kota       7520118
df_customer[order(-df_customer$Total_Belanja), c("ID_Pelanggan", "Total_Belanja")] |> head(5)
## # A tibble: 5 × 2
##   ID_Pelanggan Total_Belanja
##   <chr>                <dbl>
## 1 ID00034           11626302
## 2 ID00011           11527638
## 3 ID00057           11031197
## 4 ID00093           10984825
## 5 ID00007           10846012
table(df_customer$Jenis_Kelamin)
## 
## Laki-laki Perempuan 
##       121       179
df_customer$Kategori_Penghasilan <- cut(df_customer$Penghasilan,
                                        breaks = c(-Inf, 5000000, 10000000, Inf),
                                        labels = c("Rendah", "Menengah", "Tinggi"))
table(df_customer$Kategori_Penghasilan)
## 
##   Rendah Menengah   Tinggi 
##       27      175       98
df_filter1 <- subset(df_customer, Total_Belanja > 5000000)
table(df_filter1$ID_Pelanggan)
## 
## ID00001 ID00002 ID00004 ID00005 ID00006 ID00007 ID00008 ID00009 ID00010 ID00011 
##       1       3       2       1       4       7       2       4       2       1 
## ID00012 ID00013 ID00014 ID00015 ID00016 ID00017 ID00018 ID00021 ID00022 ID00023 
##       1       3       3       1       4       2       1       2       2       4 
## ID00024 ID00025 ID00026 ID00027 ID00029 ID00030 ID00031 ID00032 ID00033 ID00034 
##       4       7       6       1       1       3       4       4       2       2 
## ID00035 ID00036 ID00037 ID00038 ID00039 ID00040 ID00041 ID00042 ID00043 ID00044 
##       3       4       3       2       3       3       2       3       1       1 
## ID00045 ID00046 ID00047 ID00048 ID00049 ID00050 ID00051 ID00052 ID00053 ID00054 
##       3       3       1       2       2       2       2       3       5       2 
## ID00055 ID00056 ID00057 ID00058 ID00059 ID00060 ID00061 ID00063 ID00064 ID00066 
##       3       1       4       2       1       3       1       4       1       2 
## ID00067 ID00069 ID00070 ID00071 ID00072 ID00073 ID00074 ID00075 ID00076 ID00077 
##       4       3       1       3       3       1       4       2       2       1 
## ID00078 ID00079 ID00082 ID00083 ID00084 ID00085 ID00086 ID00087 ID00088 ID00089 
##       1       5       3       2       5       3       2       4       1       6 
## ID00090 ID00091 ID00092 ID00093 ID00094 ID00095 ID00096 ID00097 ID00098 ID00099 
##       5       4       2       5       3       1       2       2       1       1
sort(table(df_filter1$ID_Pelanggan), decreasing=TRUE)[1]
## ID00007 
##       7
df_filter2 <- subset(df_customer, Jenis_Kelamin=="Perempuan" & Tempat_Tinggal=="Kota")
freq2 <- table(df_filter2$ID_Pelanggan)
sum(freq2>5)
## [1] 0
df_filter3 <- subset(df_customer, Penghasilan > 5000000)
table(df_filter3$ID_Pelanggan)
## 
## ID00001 ID00002 ID00004 ID00005 ID00006 ID00007 ID00008 ID00009 ID00010 ID00011 
##       2       3       2       2       4       9       3       5       2       1 
## ID00012 ID00013 ID00014 ID00015 ID00016 ID00017 ID00018 ID00020 ID00021 ID00022 
##       1       2       5       1       4       3       1       2       2       2 
## ID00023 ID00024 ID00025 ID00026 ID00027 ID00029 ID00030 ID00031 ID00032 ID00033 
##       5       5       7       6       2       2       3       4       4       1 
## ID00034 ID00035 ID00036 ID00037 ID00038 ID00039 ID00040 ID00041 ID00042 ID00043 
##       2       3       4       2       2       3       3       2       4       2 
## ID00044 ID00045 ID00046 ID00047 ID00048 ID00049 ID00050 ID00051 ID00052 ID00053 
##       1       3       4       1       2       2       3       2       3       5 
## ID00054 ID00055 ID00056 ID00057 ID00058 ID00059 ID00060 ID00061 ID00062 ID00063 
##       4       3       1       4       2       1       3       2       1       5 
## ID00064 ID00066 ID00067 ID00068 ID00069 ID00070 ID00071 ID00072 ID00073 ID00074 
##       2       1       4       1       3       2       4       2       1       5 
## ID00075 ID00076 ID00077 ID00078 ID00079 ID00081 ID00082 ID00083 ID00084 ID00085 
##       2       3       3       1       5       2       4       3       5       4 
## ID00086 ID00087 ID00088 ID00089 ID00090 ID00091 ID00092 ID00093 ID00094 ID00095 
##       3       4       1       6       5       4       2       7       3       1 
## ID00096 ID00097 ID00099 
##       2       3       1
sort(table(df_filter3$ID_Pelanggan), decreasing=TRUE)[1]
## ID00007 
##       9
df_filter4 <- subset(df_customer, Tempat_Tinggal=="Desa" & Total_Belanja > 5000000)
unique(df_filter4$Jenis_Kelamin)
## [1] "Perempuan" "Laki-laki"
df_filter5 <- subset(df_customer, Tempat_Tinggal=="Desa" & Total_Belanja > 5000000)
df_filter5[, c("ID_Pelanggan","Penghasilan","Total_Belanja")]
## # A tibble: 47 × 3
##    ID_Pelanggan Penghasilan Total_Belanja
##    <chr>              <dbl>         <dbl>
##  1 ID00067          7773498       6982081
##  2 ID00014          6776730       6315967
##  3 ID00027          8108645       6901502
##  4 ID00089          9032981       5776859
##  5 ID00034          5616450       7064321
##  6 ID00013          4481204       5438461
##  7 ID00091          6128487       5990469
##  8 ID00038          5947963       5094570
##  9 ID00041          9231091       8371463
## 10 ID00047          5940612       6172545
## # ℹ 37 more rows