1 Case 1

Assume you have collected some data set from ABC Property as we can see in the following table:

Id             <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
                        "Jeffry","Nikita","Kefas","Siana","Lala",
                        "Fallen","Ardifo","Kevin","Juen","Jerrel",
                        "Imelda","Widi","Theodora","Elvani","Jonathan",
                        "Sofia","Abraham","Siti","Niko","Sefli",
                        "Bene", "Diana", "Pupe", "Andi", "Tatha",
                        "Endri", "Monika", "Hans", "Debora","Hanifa",
                        "James", "Jihan", "Friska","Ardiwan", "Bakti",
                        "Anthon","Amry", "Wiwik", "Bastian", "Budi",
                        "Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp       <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
                        5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
                        3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
                        4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
                        6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City           <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster        <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
                           "Peronia","Lavesh","Alindra","Sweethome", "Asera",
                           "Teradamai","Albasia", "Adara","Neon","Arana",
                           "Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price          <- sample(c(7000:15000),10000, replace = T)
Date_Sales     <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement  <- sample(c(1:20), 10000, replace = T)
Data           <- data.frame(Id, 
                             Marketing_Name,
                             Work_Exp,
                             City,
                             Cluster,
                             Price,
                             Date_Sales,
                             Advertisement)
library(DT)
datatable(Data)

1.1 Task 1

Categorize the Price into three groups:

$\text{High} > 12000$
$10000 \le \text{Medium} \le 12000$
$\text{Low} < 10000$

Assign it into a new variable called Class by using If and else if statement.

Data$Class <- ifelse(Data$Price>12000, 
                     "High",
                     ifelse(Data$Price<10000,
                            "Low",
                            "Medium"))
library(DT)
datatable(Data)

1.2 Task 2

Categorize the Price into six groups:

5 % Booking fee if the $\text{Price} < 8000$
6 % Booking fee if the $8000 \le \text{Price} < 9000$
7 % Booking fee if the $9000 \le \text{Price} < 10000$
8 % Booking fee if the $10000 \le \text{Price} < 11000$
9 % Booking fee if the $11000 \le \text{Price} < 13000$
10 % Booking fee if the $13000 \le \text{Price} \le 15000$

Assign it into a new variable called Booking_fee by using if and else if statements.

Data$Booking_fee <- ifelse(Data$Price >= 13000,
                           Data$Price*0.1,
                           ifelse(Data$Price>=11000,
                                  Data$Price*0.09,
                                  ifelse(Data$Price>=10000,
                                         Data$Price*0.08,
                                         ifelse(Data$Price>=9000,
                                                Data$Price*0.07,
                                                ifelse(Data$Price>=8000,
                                                       Data$Price*0.06,
                                                       Data$Price*0.05)))))
library(DT)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

subset(Data, select = c(6, 10)) %>% datatable()

1.3 Task 3

According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.

library(dplyr)
for (i in "Julian"){
  My_Sales <- filter(Data, Marketing_Name==i)
}
library(DT)
datatable(My_Sales)

1.4 Task 4

If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.

    Data$Bonus <-((0.02+ 
                ifelse(Data$Work_Exp>3,
                       0.01,
                       0))
              * Data$Booking_fee)
library(DT)
library(dplyr)
subset(Data, select = c(10, 11)) %>% datatable()

1.5 Task 5

In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:

Who is the best marketing?

library(dplyr)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.4.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Marketing_Sales <- aggregate(Price ~ Marketing_Name, 
                            data = Data, 
                            sum)
Best_Marketing <- Marketing_Sales[
  order(Marketing_Sales$Price, 
        decreasing = T),] %>% 
  head (1) %>% 
  print()

##    Marketing_Name   Price
## 10        Bastian 2294323

Which City and Cluster is the most profitable?

library(dplyr)
City_Cluster <- aggregate(Price ~ City + Cluster, 
                          data = Data, 
                          sum)
Best_City_Cluster <- City_Cluster [
  order(City_Cluster$Price, decreasing = T),] %>%
  head(1)%>%
  print()

##         City   Cluster   Price
## 80 Tengerang Sweethome 1448575

Calculate your total advertisement cost, if you have to pay them $4 once.

Data$Advertisement <- as.numeric(Data$Advertisement)
Data$Advertisement_Cost <- Data$Advertisement * 4
Total_Cost <- sum(Data$Advertisement_Cost)
paste("The total cost was", Total_Cost)

## [1] "The total cost was 420236"

Calculate the average advertisement cost for each marketing.

Marketing_Ads_Avg <- aggregate(Advertisement_Cost ~ Marketing_Name, 
                           data = Data,
                           mean)
library(DT)
datatable(Marketing_Ads_Avg)

Calculate the Total Revenue (in Monthly) $\text{Total Revenue}=\text{Price}+\text{Booking Fee}$

library(dplyr)
Data$Revenue <- Data$Booking_fee + Data$Price
Monthly_Revenue <- Data %>% 
  separate(Date_Sales, c("Year", "Month", "Day"), sep = "-") %>%
  select(Year, Month, Revenue)
Total_Monthly_Revenue <- aggregate(Revenue ~ Year+Month,Monthly_Revenue, sum)
Total_Monthly_Revenue <- Total_Monthly_Revenue[order(Total_Monthly_Revenue$Year, decreasing = F),]
library(DT)
datatable(Total_Monthly_Revenue)

2 Case 2

Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:

Marital_Status : assign random marital status (“Yes”, “No”)
Address : assign random address (JABODETABEK)
Work_Location : assign random working location (JABODETABEK)
Age : assign a sequence of random numbers (from 19 to 60)
Academic : assign random academic levels (“J.School”,“H.School”, “Undergraduate”, “Master”, “Phd”)
Job : 10 random jobs for each academic levels
Grade : 5 random grades for each Jobs
Income : assign the possible income for each Jobs
Spending : assign the possible spending for each Jobs
Number_of_children: assign a random number in between 0 and 10 (according to marital status)
Private_vehicle : assign the possible private vehicle for each person (“Car”, “motorcycle”, “Public”)
Home : “Rent”, “Own”, “Credit”

2.1 Task 1

Please provide me a data set about the information of 50000 customers refers to each variable above!

Marital_Status <- sample(c("yes","no"),
                         50000,
                         T)
Address <- sample(c("Jakarta", 
                    "Bogor", 
                    "Depok", 
                    "Tangerang", 
                    "Bekasi"),
                  50000,
                  T)
Work_Location <- sample(c("Jakarta", 
                          "Bogor", 
                          "Depok", 
                          "Tangerang", 
                          "Bekasi"),
                        50000,
                        T)
Age <- sample(19:60,
              50000,
              T)
Academic <- sample(c("J.School",
                     "H.School",
                     "Undergraduate",
                     "Master",
                     "PhD"),50000,T)
Job <- ifelse (Academic=="J.School",
               sample(c("Office Boy/Office Girl", "kitchen helper",
                        "Customer Service", "Buruh",
                        "Cleaning Service", "Security",
                        "Packing Barang","Waiter/Waitress",
                        "Staff Gudang", "Driver")),
                      ifelse(Academic=="H.School",
                             sample(c("Staff Accounting", "Marketing",
                                      "Operator Produksi", "Guru Les",
                                      "Staff Administrasi", "Polisi",
                                      "Data Entry", "Kasir",
                                      "Back Office", "ABK")), 
                             ifelse(Academic=="Undergraduate",
                                    sample(c("Pengajar", "Pilot",
                                             "Pengacara Konsultan", "Nakhoda",
                                             "Software Developer", "Masinis",
                                             "Staff Desain Perencanaan", "PNS",
                                             "Market Researcher", "Dokter")), 
                                    ifelse(Academic=="Master", 
                                           sample(c("Aktuaris", "Data Analyst",
                                                    "Data Scientist", "Dokter Spesialis",
                                                    "Branch Manager", "Enterpreneur",
                                                    "Notaris", "Jaksa",
                                                    "Psikolog", "Supervisor")),
                                           sample(c("CEO", "Hakim",
                                                    "Researcher", "Manager",
                                                    "Komandan", "President",
                                                    "Menteri", "Neuroscientist",
                                                    "Rektor", "Kaprodi"))
                                           ))))
Grade <- sample(c("Novice", 
                  "Adv. Beginner", 
                  "Competent", 
                  "Proficient",
                  "Expert"),
                50000,
                T)
Salary_Function <- function(x,y){
  # Basic_Salary based on Academic
  J.School <- sample(c(500000:2000000))
  H.School <- sample(c(2000000:3000000))
  Undergraduate <- sample(c(3000000:5000000))
  Master <- sample(c(5000000:15000000))
  PhD <- sample(c(15000000:50000000))
  Basic_Salary<-ifelse(x=="J.School",
                       J.School,
                       ifelse(x=="H.School",
                              H.School,
                              ifelse(x=="Undergraduate",
                                     Undergraduate,
                                     ifelse(x=="Master",
                                            Master,
                                            PhD))))
  # Extra_Salary based on the Grade (Experience and Skill)
  Novice <- 1
  Adv.Beginner <- 1.3
  Competent <- 1.5
  Proficient <- 1.8
  Expert <- 2
  Extra_Salary <- ifelse(y=="Novice",
                         Novice,
                         ifelse(y=="Adv.Beginner",
                                Adv.Beginner,
                                ifelse(y=="Competent",
                                       Competent,
                                       ifelse(y=="Proficient",
                                              Proficient,
                                              Expert))))
  result <- round(Basic_Salary * Extra_Salary, digits = -3)
  return(result)
}
Income <- Salary_Function(Academic, Grade)
# Ideal spending is 75% of income, and save the rest
Spending <- 0.75*Income
Number_of_Children <- ifelse(Marital_Status=="yes",
                             sample(c(0:10)),
                             0)
# Requirement to be able to have a motorcycle is the amount of 24 monthly saving are more than 30,000,000 and the requirement to be able to have a car is the amount of 48 monthly saving are more than 100,000,000
Private_Vehicle <- ifelse(48*0.25*Income >= 100000000,
                          sample(c("Car", "Motorcycle", "Public")),
                          ifelse(24*0.25*Income >= 30000000,
                                 sample(c("Motorcycle", "Public")),
                                 "Public"))
Home <- sample(c("Rent", "Own", "Credit"),50000, T)
Customer <- data.frame(Marital_Status,
                       Address,
                       Work_Location,
                       Age,
                       Academic,
                       Job,
                       Grade,
                       Income,
                       Spending,
                       Number_of_Children,
                       Private_Vehicle,
                       Home)
library(DT)
datatable(Customer)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.2 Task 2

What kind of important summary statistics you can get from your data set?

lapply(Customer, summary)

## $Marital_Status
##    Length     Class      Mode 
##     50000 character character 
## 
## $Address
##    Length     Class      Mode 
##     50000 character character 
## 
## $Work_Location
##    Length     Class      Mode 
##     50000 character character 
## 
## $Age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   29.00   40.00   39.57   50.00   60.00 
## 
## $Academic
##    Length     Class      Mode 
##     50000 character character 
## 
## $Job
##    Length     Class      Mode 
##     50000 character character 
## 
## $Grade
##    Length     Class      Mode 
##     50000 character character 
## 
## $Income
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   500000  3530000  6641000 16749662 20754500 99929000 
## 
## $Spending
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   375000  2647500  4980750 12562247 15565875 74946750 
## 
## $Number_of_Children
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   2.497   5.000  10.000 
## 
## $Private_Vehicle
##    Length     Class      Mode 
##     50000 character character 
## 
## $Home
##    Length     Class      Mode 
##     50000 character character

Statistical information such as mean, mean, quartile, min and max from numerical data such as age, inclusion, expenditure, and number of children is important information to determine the distribution of data.
We can plan the best insurance by focusing on the mode and mean and keeping in mind the upper and lower quartiles in order to reach as many customers as possible with the maximum profit.

2.3 Task 3

According to your calculations and analysis, which customers are potential for you to maintain?

# Average insurance premium is Rp. 250,000. SO, will be best for us to maintain the customer that 30% of their savings is more equal than Rp. 250,000.
Customer$Maintain <- ifelse(0.3*0.25*Customer$Income >= 250000,
                            "yes",
                            "no")
library(DT)
library(dplyr)
subset(Customer, select = c(8, 13)) %>% datatable()

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.4 Task 4

The following types of insurance are available in Indonesia:

Life insurance
Health Insurance
Vehicle Insurance
Home and property insurance
Education insurance
Business Insurance
General insurance
Credit Insurance
Marine Insurance
Travel Insurance

Assume my company provide only 4 insurances below with premium Rp 250,000 every month

Life Insurance
Health Insurance
Vehicle Insurance
Property Insurance

Which insurance product will you provide to your customers?

# Calculation & Analysis (to make your answer below are reasonable)

# For people with vehicle and house, we will offer all of the product we have. We will assign product based on how much they have. If  30% of their savings is more equal than Rp. 250,000 we will assign only one product, and so on.

# For people with only vehicle, we will offer all of the product we have except for property Insurance. We will assign product based on how much they have. If  30% of their savings is more equal than Rp. 250,000 we will assign only one product, and so on.

# For people with only house, we will offer all of the product we have except for vehicle insurance. We will assign product based on how much they have. If  30% of their savings is more equal than Rp. 400,000 we will assign only one product, and so on.

# For people without either house and vehicle, we will offer only life/health insurance. We will assign product based on how much they have. If  30% of their savings is more equal than Rp. 400,000 we will assign only one product, and if their savings more than Rp. 800,000 we will assign both.

# The reason why we always assign Health and Life is because all of our customers are worker that risk their life to work everyday. So, our company decide that these two are the most important insurance after all. 

# In addition, we will not offer our product  neither agree the submission of people whose 30% of their savings is less than Rp. 250,000 because of the risk of being late or not even paying.

# Add new variable to your data set (assign product for each customers)
Customer$Insurance_Product <- ifelse(0.3*0.25*Customer$Income >= 1000000 &
                                       Customer$Private_Vehicle != "Public" &
                                       Customer$Home != "Rent", 
                                     paste(sample(c("Vehicle", "Property",
                                                    "Health", "Life"),
                                                  size = sample(c(1:4))),
                                           collapse = ", "),
                                     ifelse(0.3*0.25*Customer$Income >= 750000 &
                                              Customer$Private_Vehicle != "Public" &
                                              Customer$Home != "Rent",
                                            paste(sample(c("Vehicle", "Property",
                                                           "Health", "Life"),
                                                         size = sample(c(1:3))),
                                                  collapse = ", "),
                                            ifelse(0.3*0.25*Customer$Income >= 500000 &
                                                     Customer$Private_Vehicle != "Public" &
                                                     Customer$Home != "Rent",
                                                   paste(sample(c("Vehicle", "Property",
                                                                  "Health", "Life"),
                                                                size = sample(c(1:2))),
                                                         collapse = ", "),
                                                   ifelse(0.3*0.25*Customer$Income >= 250000 &
                                                            Customer$Private_Vehicle != "Public" &
                                                            Customer$Home != "Rent",
                                                          sample(c("Vehicle", "Property",
                                                                   "Health", "Life"),
                                                          size = 1),
                                                          ifelse(0.3*0.25*Customer$Income >= 750000 &
                                                                   Customer$Private_Vehicle != "Public" &
                                                                   Customer$Home == "Rent",
                                                                 paste(sample(c("Vehicle",
                                                                                "Health", 
                                                                                "Life"),
                                                                              size = sample(c(1:3))),
                                                                       collapse = ", "),
                                                                 ifelse(0.3*0.25*Customer$Income >= 500000 &
                                                                          Customer$Private_Vehicle != "Public" &
                                                                          Customer$Home == "Rent",
                                                                        paste(sample(c("Vehicle",
                                                                                       "Health",
                                                                                       "Life"),
                                                                                     size = sample(c(1:2))),
                                                                              collapse = ", "),
                                                                        ifelse(0.3*0.25*Customer$Income >= 250000 &
                                                                                 Customer$Private_Vehicle != "Public" &
                                                                                 Customer$Home == "Rent",
                                                                               sample(c("Vehicle",
                                                                                        "Health",
                                                                                        "Life"),
                                                                                      size = 1),
                                                                               ifelse(0.3*0.25*Customer$Income >= 750000 &
                                                                                        Customer$Private_Vehicle == "Public" &
                                                                                        Customer$Home != "Rent",
                                                                                      paste(sample(c("Property",
                                                                                                     "Health",
                                                                                                     "Life"),
                                                                                                   size = sample(c(1:3))),
                                                                                            collapse = ", "),
                                                                                      ifelse(0.3*0.25*Customer$Income >= 500000 &
                                                                                               Customer$Private_Vehicle == "Public" &
                                                                                               Customer$Home != "Rent",
                                                                                             paste(sample(c("Property",
                                                                                                            "Health",
                                                                                                            "Life"),
                                                                                                          size = sample(c(1:2))),
                                                                                                   collapse = ", "),
                                                                                             ifelse(0.3*0.25*Customer$Income >= 250000 &
                                                                                                      Customer$Private_Vehicle == "Public" &
                                                                                                      Customer$Home != "Rent",
                                                                                                    sample(c("Property",
                                                                                                             "Health",
                                                                                                             "Life"),
                                                                                                           size = 1),
                                                                                                    ifelse(0.3*0.25*Customer$Income >= 500000 &
                                                                                                             Customer$Private_Vehicle == "Public" &
                                                                                                             Customer$Home == "Rent",
                                                                                                           paste(sample(c("Health",
                                                                                                                          "Life"),
                                                                                                                        size = sample(c(1:2))),
                                                                                                                 collapse = ", "),
                                                                                                           ifelse(0.3*0.25*Customer$Income >= 250000 &
                                                                                                                    Customer$Private_Vehicle == "Public" &
                                                                                                                    Customer$Home == "Rent",
                                                                                                                  sample(c("Health",
                                                                                                                           "Life"),
                                                                                                                         size = 1),
                                                                                                                  "No Product"
                                                                                                    ))))))))))))
library(DT)
datatable(Customer)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Midterm Exam ~ Data Structures and Algorithms

Julian Salomo

October 29, 2020