1 Case 1

Assume you have collected some data set from ABC Property as we can see in the following table:

Id             <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
                        "Jeffry","Nikita","Kefas","Siana","Lala",
                        "Fallen","Ardifo","Kevin","Juen","Jerrel",
                        "Imelda","Widi","Theodora","Elvani","Jonathan",
                        "Sofia","Abraham","Siti","Niko","Sefli",
                        "Bene", "Diana", "Pupe", "Andi", "Tatha",
                        "Endri", "Monika", "Hans", "Debora","Hanifa",
                        "James", "Jihan", "Friska","Ardiwan", "Bakti",
                        "Anthon","Amry", "Wiwik", "Bastian", "Budi",
                        "Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp       <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
                        5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
                        3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
                        4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
                        6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City           <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster        <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
                           "Peronia","Lavesh","Alindra","Sweethome", "Asera",
                           "Teradamai","Albasia", "Adara","Neon","Arana",
                           "Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price          <- sample(c(7000:15000),10000, replace = T)
Date_Sales     <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement  <- sample(c(1:20), 10000, replace = T)
Data           <- data.frame(Id, 
                             Marketing_Name,
                             Work_Exp,
                             City,
                             Cluster,
                             Price,
                             Date_Sales,
                             Advertisement)
library(DT)

## Warning: package 'DT' was built under R version 4.0.3

datatable(Data)

1.1 Task 1

Categorize the Price into three groups:

$\text{High} > 1200$
$1000 \le \text{Medium} \le 12000$
$\text{Low} < 10000$

Assign it into a new variable called Class by using If and else if statement.

Data$Class <- ifelse(Data$Price>12000,"High",
                     ifelse(Data$Price<10000,
                            "Low",
                            "Medium"))
View(Data)

1.2 Task 2

Categorize the Price into six groups:

5 % Booking fee if the $\text{Price} < 8000$
6 % Booking fee if the $8000 \le \text{Price} < 9000$
7 % Booking fee if the $9000 \le \text{Price} < 10000$
8 % Booking fee if the $10000 \le \text{Price} < 11000$
9 % Booking fee if the $11000 \le \text{Price} < 13000$
10 % Booking fee if the $13000 \le \text{Price} \le 15000$

Assign it into a new variable called Booking_fee by using if and else if statements.

Data$Booking_Fee <- ifelse(Data$Price< 8000, Data$Price*0.05
                           ,ifelse(Data$Price<9000 & Data$Price>8000, Data$Price*0.06,
                            ifelse(Data$Price<10000 & Data$Price>9000, Data$Price*0.07,
                            ifelse(Data$Price<11000 & Data$Price>10000, Data$Price*0.08,
                            ifelse(Data$Price>11000 & Data$Price<13000, Data$Price*0.09,
                            Data$Price*0.01)))))
View(Data)

1.3 Task 3

According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.3

## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

for(Irene in Data %>% filter(Marketing_Name=="Irene"))
  print(head(Irene,5))

## [1]   4  54 104 154 204
## [1] "Irene" "Irene" "Irene" "Irene" "Irene"
## [1] 3.6 3.6 3.6 3.6 3.6
## [1] "Jakarta"   "Tengerang" "Bogor"     "Tengerang" "Jakarta"  
## [1] "Arana"     "Sweethome" "Arana"     "Permata"   "Victoria" 
## [1]  9844 13889 12247 12166  9447
## [1] "2020-04-26" "2018-03-15" "2018-06-03" "2019-08-21" "2020-04-27"
## [1]  4  9  7 18  1
## [1] "Low"  "High" "High" "High" "Low" 
## [1]  689.08  138.89 1102.23 1094.94  661.29

1.4 Task 4

If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.

for(Irene in Data){
  Data$Bonus <- ifelse(Data$Work_Exp>3,
  Data$Booking_Fee*0.03,
  Data$Booking_Fee*0.02)
  aggregate(Bonus ~ Marketing_Name, data = Data, sum, T)
  break}
View(Data)

1.5 Task 5

In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:

Who is the best marketing?

library(dplyr)

Best_Marketing<-aggregate(Price~Marketing_Name,data=Data,sum)

Best_Marketing_Name<- Best_Marketing[order(Best_Marketing$Price,decreasing=T),]%>%
  head(1)%>%
  print()

##    Marketing_Name   Price
## 18         Fallen 2252260

Which City and Cluster is the most profitable?

Best_City<-aggregate(Price~City,data=Data,sum)

Most_City_Profitable<- Best_City[order(Best_City$Price,decreasing=T),]%>%
  head(1)%>%
  print()

##    City    Price
## 3 Depok 22383582

Best_Cluster<-aggregate(Price~Cluster,data=Data,sum)

Most_Cluster_Profitable<- Best_Cluster[order(Best_Cluster$Price,decreasing=T),]%>%
  head(1)%>%
  print()

##   Cluster   Price
## 6   Asera 5992982

Calculate your total advertisement cost, if you have to pay them $4 once.

library(dplyr)
list   <- c()
for(i in "Irene"){
  list[[i]] <- aggregate(Advertisement ~ Marketing_Name, data = Data, sum, T)
  break
}
result <- list[[i]]

Total_Advertisement_Cost  <- function(x){
  Count <- result$Advertisement*4
  Total <- paste(Count, sep = "", "$")
}
result$Advertisement_Cost <- Total_Advertisement_Cost(result$Advertisement)

result%>%filter(Marketing_Name=="Irene")%>%print()

##   Marketing_Name Advertisement Advertisement_Cost
## 1          Irene          1979              7916$

Calculate the average advertisement cost for each marketing.

Average_Advertisement_Cost <- aggregate(Advertisement ~ Marketing_Name, data=Data, mean)
print(Average_Advertisement_Cost)

##    Marketing_Name Advertisement
## 1         Abraham        10.805
## 2            Amry         9.535
## 3            Andi        10.435
## 4           Angel        10.730
## 5          Anthon        10.515
## 6          Ardifo        10.175
## 7         Ardiwan        10.950
## 8            Arry        10.775
## 9           Bakti        10.415
## 10        Bastian        10.485
## 11           Bene        10.570
## 12           Budi        11.255
## 13         Debora         9.605
## 14          Diana        11.585
## 15        Eliando        11.275
## 16         Elvani        10.825
## 17          Endri        10.385
## 18         Fallen        10.440
## 19         Friska         9.815
## 20         Hanifa         9.875
## 21           Hans         9.905
## 22         Imelda        10.440
## 23          Irene         9.890
## 24          James        10.565
## 25         Jeffry        10.945
## 26         Jerrel        10.455
## 27          Jihan        10.200
## 28       Jonathan        10.645
## 29           Juen        11.260
## 30         Julian        10.195
## 31          Kefas        10.705
## 32          Kevin         9.860
## 33           Lala        10.370
## 34            Leo        10.570
## 35         Matius        10.730
## 36         Monika        10.725
## 37         Nikita        10.780
## 38           Niko        10.530
## 39           Pupe        10.440
## 40          Sefli        10.735
## 41         Sherly        10.720
## 42          Siana        10.815
## 43          Simon        10.675
## 44           Siti        10.660
## 45          Sofia        10.345
## 46          Tatha        10.480
## 47       Theodora        10.945
## 48        Vanessa        10.680
## 49           Widi        10.165
## 50          Wiwik        10.545

Calculate the Total Revenue (in Monthly)

library(dplyr)
library(tidyverse)
Data$Revenue     <- Data$Price - Data$Booking_Fee
Monthly_Revenue  <- Data%>%separate(Date_Sales,c("Year","Month","Day"), sep = "-")%>%
                    select(Year,Month, Revenue)
Monthly_Revenue  <- aggregate(Revenue ~ Month + Year, data = Monthly_Revenue,sum)
Monthly_Revenue

##    Month Year Revenue
## 1     01 2018 3142933
## 2     02 2018 2785752
## 3     03 2018 3365445
## 4     04 2018 2966820
## 5     05 2018 3165520
## 6     06 2018 3035318
## 7     07 2018 3167802
## 8     08 2018 3326887
## 9     09 2018 3257464
## 10    10 2018 3069426
## 11    11 2018 2947576
## 12    12 2018 3171052
## 13    01 2019 3258454
## 14    02 2019 3039338
## 15    03 2019 3067721
## 16    04 2019 3111915
## 17    05 2019 3525306
## 18    06 2019 3147552
## 19    07 2019 3235537
## 20    08 2019 3160480
## 21    09 2019 3444583
## 22    10 2019 3159982
## 23    11 2019 3195404
## 24    12 2019 3543213
## 25    01 2020 2981103
## 26    02 2020 2866803
## 27    03 2020 3258863
## 28    04 2020 2958305
## 29    05 2020 3443967
## 30    06 2020 3051025
## 31    07 2020 3041338
## 32    08 2020 3224513
## 33    09 2020 2612396

2 Case 2

Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:

Marital_Status : assign random marital status (“Yes”, “No”)
Address : assign random address (JABODETABEK)
Work_Location : assign random working location (JABODETABEK)
Age : assign a sequence of random numbers (from 19 to 60)
Academic : assign random academic levels (“J.School”,“H.School”, “Undergraduate”, “Master”, “Phd”)
Job : 10 random jobs for each academic levels
Grade : 5 random grades for each Jobs
Income : assign the possible income for each Jobs
Spending : assign the possible spending for each Jobs
Number_of_children: assign a random number in between 0 and 10 (according to marital status)
Private_vehicle : assign the possible private vehicle for each person (“Car”, “motorcycle”, “Public”)
Home : “Rent”, “Own”, “Credit”

2.1 Task 1

Please provide me a data set about the information of 50000 customers refers to each variable above!

Marital_Status          <- sample(c("Yes","No"),50000,replace=T)
Address                 <- sample(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"),50000,replace=T)
Work_Location           <- sample(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"),50000,replace=T)
Age                     <- sample(c(19:60),50000,replace=T)
Academic                <- sample(c("J.School","H.School","Undergraduate","Master","Phd"),50000,replace=T)
Job                     <- ifelse(Academic =="J.School",
                           sample(c("Konstruksi","Cleaning Service","Operator Warnet","Baby Sitter",
                                    "Supir Pribadi","Tukang Kebun","Staf Gudang","Helper","Security",
                                    "Petani"
                                 ),length(Academic=="J.School"),replace=T),
                           ifelse(Academic=="H.School",
                           sample(c("Customer Service","Penulis","Input Data","Asisten Pilot",
                                    "Anak Buah Kapal","Asisten Dokter Gigi","Staf Algikultur",
                                    "operator produksi","Pengajar Bimbel","Polisi"
                                    ),length(Academic=="H.School"),replace=T),
                           ifelse(Academic=="Undergraduate",
                           sample(c("Admin", "Akuntan", "Freelance designer","Perawat","Pengacara",
                                    "Jaksa","Hakim","Aktuaria","Make-up Artist","Selebgram"
                                ),length(Academic=="Undergraduate"),replace=T),
                           ifelse(Academic=="Master",
                           sample(c("Engineer", "Arsitek", "Guru","Psikolog","Pengacara","Data Analyst", 
                                    "Data Scientist", "Aktuaria","Dosen", "Hakim"
                                    ),length(Academic=="Master"),replace=T),
                        
                           sample(c("Hakim","Dosen", "Dokter","Arsitek","Pilot", "Ahli bedah Syaraf",
                                    "Guru","Aktuaria", "Data Analyst","Ahli bedah jantung"
                                ),length(Academic=="Phd"),replace=T)))))
Grade                   <- sample(c("Staff","Manager","Senior Manager",
                                    "General Manager","Director"),50000, replace=T)
Income                  <- sample(c(3500000:10000000),50000,replace=T)
Spending                <- sample(c(2000000:7000000),50000,replace=T)
Number_of_children      <- ifelse(Marital_Status=="Yes",
                           sample((c(0:10)),
                                  length(Marital_Status=="Yes"),replace=T),"0")
Private_vehicle         <- sample(c("Car","Motorcycle","Public"),50000,replace=T)
Home                    <- sample(c("Rent","Own","Credit"),50000, replace=T)
                          
Data02                  <- data.frame(Marital_Status,
                                      Address, 
                                      Work_Location,
                                      Age,
                                      Academic,
                                      Job,
                                      Grade,
                                      Income,
                                      Spending,
                                      Number_of_children,
                                      Private_vehicle,
                                      Home)
library(DT)
datatable(Data02)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.2 Task 2

What kind of important summary statistics you can get from your data set?

summary(Data02)

##  Marital_Status       Address          Work_Location           Age       
##  Length:50000       Length:50000       Length:50000       Min.   :19.00  
##  Class :character   Class :character   Class :character   1st Qu.:29.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :40.00  
##                                                           Mean   :39.51  
##                                                           3rd Qu.:50.00  
##                                                           Max.   :60.00  
##    Academic             Job               Grade               Income       
##  Length:50000       Length:50000       Length:50000       Min.   :3500141  
##  Class :character   Class :character   Class :character   1st Qu.:5146040  
##  Mode  :character   Mode  :character   Mode  :character   Median :6769894  
##                                                           Mean   :6765918  
##                                                           3rd Qu.:8398170  
##                                                           Max.   :9999985  
##     Spending       Number_of_children Private_vehicle        Home          
##  Min.   :2000001   Length:50000       Length:50000       Length:50000      
##  1st Qu.:3253430   Class :character   Class :character   Class :character  
##  Median :4497450   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :4497859                                                           
##  3rd Qu.:5749360                                                           
##  Max.   :6999943

2.3 Task 3

According to your calculations and analysis, which customers are potential for you to maintain?

Data02$Potential<- ifelse(0.35*Data02$Income>Data02$Income-Data02$Spending,"Yes","No")
View(Data02)

2.4 Task 4

The following types of insurance are available in Indonesia:

Life insurance
Health Insurance
Vehicle Insurance
Home and property insurance
Education insurance
Business Insurance
General insurance
Credit Insurance
Marine Insurance
Travel Insurance

Which insurance product will you provide to your customers?

# Calculation & Analysis (to make your answer below are reasonable)
Insurance  <- ifelse(Data02$Age>50,"Life insurance","Education insurance")

# Add new variable to your data set (assign product for each customers)
Data02$Insurance <- ifelse(Data02$Age>50,"Life insurance",
                    ifelse(Data02$Age>40,"Health insurance",
                    ifelse(Data02$Age>30,"Business insurance",
                    ifelse(Data02$Age>20,"Travel insurance",
                    ifelse(Data02$Age>10,"Education insurance",
                    ifelse(Data02$Marital_Status=="Yes","Home and property insurance",
                    ifelse(Data02$Private_vehicle=="Car","Vehicle insurance",
                           "General insurance")))))))
View(Data02)

Midterm Exam ~ Data Structures and Algorithms

Jocelyn Irene Gani

October 29, 2020