1 Case 1

Assume you have collected some data set from ABC Property as we can see in the following table:

Id             <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
                        "Jeffry","Nikita","Kefas","Siana","Lala",
                        "Fallen","Ardifo","Kevin","Juen","Jerrel",
                        "Imelda","Widi","Theodora","Elvani","Jonathan",
                        "Sofia","Abraham","Siti","Niko","Sefli",
                        "Bene", "Diana", "Pupe", "Andi", "Tatha",
                        "Endri", "Monika", "Hans", "Debora","Hanifa",
                        "James", "Jihan", "Friska","Ardiwan", "Bakti",
                        "Anthon","Amry", "Wiwik", "Bastian", "Budi",
                        "Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp       <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
                        5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
                        3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
                        4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
                        6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City           <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster        <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
                           "Peronia","Lavesh","Alindra","Sweethome", "Asera",
                           "Teradamai","Albasia", "Adara","Neon","Arana",
                           "Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price          <- sample(c(7000:15000),10000, replace = T)
Date_Sales     <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement  <- sample(c(1:20), 10000, replace = T)
Data           <- data.frame(Id, 
                             Marketing_Name,
                             Work_Exp,
                             City,
                             Cluster,
                             Price,
                             Date_Sales,
                             Advertisement)
library(DT)
datatable(Data)

1.1 Task 1

Categorize the Price into three groups:

$\text{High} > 1200$
$1000 \le \text{Medium} \le 12000$
$\text{Low} < 10000$

Assign it into a new variable called Class by using If and else if statement.

category_price<-ifelse(Data$Price>12000, "High"
                       , ifelse(Data$Price<10000, "Low",
                                "Medium"))
Data$Class<-category_price
print(head(Data, 5))

##   Id Marketing_Name Work_Exp    City  Cluster Price Date_Sales Advertisement
## 1  1          Angel      1.3   Depok  Albasia 10395 2018-10-26             8
## 2  2         Sherly      2.4  Bekasi Victoria 10250 2019-09-14             4
## 3  3        Vanessa      2.5  Bekasi    Asera 11458 2020-04-27             2
## 4  4          Irene      3.6 Jakarta   Lavesh  9610 2018-01-16            12
## 5  5         Julian      3.7 Jakarta    Asera  7978 2020-06-11            16
##    Class
## 1 Medium
## 2 Medium
## 3 Medium
## 4    Low
## 5    Low

1.2 Task 2

Categorize the Price into six groups:

5 % Booking fee if the $\text{Price} < 8000$
6 % Booking fee if the $8000 \le \text{Price} < 9000$
7 % Booking fee if the $9000 \le \text{Price} < 10000$
8 % Booking fee if the $10000 \le \text{Price} < 11000$
9 % Booking fee if the $11000 \le \text{Price} < 13000$
10 % Booking fee if the $13000 \le \text{Price} \le 15000$

Assign it into a new variable called Booking_fee by using if and else if statements.

Booking_price<-ifelse(Data$Price<8000, Data$Price*0.05
                       , ifelse(Data$Price<9000 & Data$Price>8000, Data$Price*0.06,
                          ifelse(Data$Price<10000 & Data$Price>9000, Data$Price*0.07,
                          ifelse(Data$Price<11000 & Data$Price>10000, Data$Price*0.08,
                          ifelse(Data$Price>11000 & Data$Price<13000, Data$Price*0.09,
                          Data$Price*0.1)))))
Data$Booking_fee<-Booking_price
print(head(Data, 5))

##   Id Marketing_Name Work_Exp    City  Cluster Price Date_Sales Advertisement
## 1  1          Angel      1.3   Depok  Albasia 10395 2018-10-26             8
## 2  2         Sherly      2.4  Bekasi Victoria 10250 2019-09-14             4
## 3  3        Vanessa      2.5  Bekasi    Asera 11458 2020-04-27             2
## 4  4          Irene      3.6 Jakarta   Lavesh  9610 2018-01-16            12
## 5  5         Julian      3.7 Jakarta    Asera  7978 2020-06-11            16
##    Class Booking_fee
## 1 Medium      831.60
## 2 Medium      820.00
## 3 Medium     1031.22
## 4    Low      672.70
## 5    Low      398.90

1.3 Task 3

According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

for (i in "Vanessa"){
  Data%>%filter(Marketing_Name==i)%>%head(10)%>%print()
  break
}

##     Id Marketing_Name Work_Exp      City   Cluster Price Date_Sales
## 1    3        Vanessa      2.5    Bekasi     Asera 11458 2020-04-27
## 2   53        Vanessa      2.5     Bogor   Mutiara  8507 2019-04-28
## 3  103        Vanessa      2.5 Tengerang Primadona 13339 2018-10-17
## 4  153        Vanessa      2.5    Bekasi     Adara  7053 2019-04-29
## 5  203        Vanessa      2.5   Jakarta  Victoria 14218 2019-08-20
## 6  253        Vanessa      2.5 Tengerang  Alamanda 14964 2020-05-19
## 7  303        Vanessa      2.5   Jakarta   Palmyra 13843 2020-01-31
## 8  353        Vanessa      2.5     Bogor      Neon  7953 2020-03-23
## 9  403        Vanessa      2.5 Tengerang    Narada 11171 2018-03-07
## 10 453        Vanessa      2.5     Bogor     Adara 13339 2019-07-28
##    Advertisement  Class Booking_fee
## 1              2 Medium     1031.22
## 2              9    Low      510.42
## 3             18   High     1333.90
## 4              2    Low      352.65
## 5             16   High     1421.80
## 6             11   High     1496.40
## 7             19   High     1384.30
## 8              4    Low      397.65
## 9              9 Medium     1005.39
## 10            10   High     1333.90

1.4 Task 4

If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.

library(dplyr)
list<-c()
for (i in Data$Marketing_Name){
 Data$Bonus <- ifelse(Data$Work_Exp>3, Data$Booking_fee*0.03, Data$Booking_fee*0.02)
 list[[i]]<- aggregate(Bonus ~ Marketing_Name, data = Data, sum, T)
  break}
result<-list[[i]]
result      <- result[order(result$Bonus,decreasing=T),]
result%>%filter(Marketing_Name=="Vanessa")%>%print()

##   Marketing_Name    Bonus
## 1        Vanessa 3710.032

1.5 Task 5

In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:

Who is the best marketing?

library(dplyr)
list_by_Price       <-aggregate(Price~ Marketing_Name , data = Data, sum)
result_price        <-list_by_Price[order(list_by_Price$Price,decreasing=T),]
list_by_adv         <-aggregate(Advertisement~ Marketing_Name , data = Data, sum)
Datasales <- data.frame(left_join(result_price, list_by_adv))%>%head(1)

## Joining, by = "Marketing_Name"

Datasales

##   Marketing_Name   Price Advertisement
## 1         Fallen 2249681          1988

print(paste("the best marketing", sep = " is ", Datasales$Marketing_Name ))

## [1] "the best marketing is Fallen"

Which City and Cluster is the most profitable?

library(dplyr)
Profitable_City_Cluster <- aggregate ( Price ~ Cluster + City, data= Data, sum)
Data_City_Cluster <-Profitable_City_Cluster[order(Profitable_City_Cluster$Price,decreasing=T),]%>%head(1)
Data_City_Cluster

##      Cluster  City   Price
## 36 Sweethome Bogor 1356433

print(paste(Data_City_Cluster$Cluster, "and", Data_City_Cluster$City, "is the most profitable"))

## [1] "Sweethome and Bogor is the most profitable"

Calculate your total advertisement cost, if you have to pay them $4 once.

library(dplyr)
list<-c()
for (i in "Vanessa"){
 list[[i]]<- aggregate(Advertisement ~ Marketing_Name, data = Data, sum, T)
  break}

result<-list[[i]]

Total.ADV<- function(x){
  Count <-result$Advertisement*4
  Total <- paste(Count, sep = "", "$")} 
result$Advertisement_cost<-Total.ADV(result$Advertisement)

result%>%filter(Marketing_Name=="Vanessa")%>%print()

##   Marketing_Name Advertisement Advertisement_cost
## 1        Vanessa          2196              8784$

Calculate the average advertisement cost for each marketing.

Data$Advertisement.cost <- Data$Advertisement*4
aggregate(Advertisement.cost ~ Marketing_Name, data= Data, mean)

##    Marketing_Name Advertisement.cost
## 1         Abraham              41.20
## 2            Amry              41.44
## 3            Andi              38.42
## 4           Angel              43.22
## 5          Anthon              42.98
## 6          Ardifo              44.44
## 7         Ardiwan              42.18
## 8            Arry              41.66
## 9           Bakti              42.18
## 10        Bastian              43.80
## 11           Bene              42.82
## 12           Budi              41.54
## 13         Debora              42.82
## 14          Diana              39.06
## 15        Eliando              42.10
## 16         Elvani              41.28
## 17          Endri              38.98
## 18         Fallen              39.76
## 19         Friska              40.22
## 20         Hanifa              40.86
## 21           Hans              41.02
## 22         Imelda              44.00
## 23          Irene              39.82
## 24          James              41.50
## 25         Jeffry              43.34
## 26         Jerrel              41.74
## 27          Jihan              43.40
## 28       Jonathan              45.32
## 29           Juen              40.74
## 30         Julian              40.48
## 31          Kefas              42.10
## 32          Kevin              40.48
## 33           Lala              41.82
## 34            Leo              45.44
## 35         Matius              40.72
## 36         Monika              44.62
## 37         Nikita              44.58
## 38           Niko              44.36
## 39           Pupe              41.80
## 40          Sefli              42.86
## 41         Sherly              42.66
## 42          Siana              38.92
## 43          Simon              43.50
## 44           Siti              43.92
## 45          Sofia              42.78
## 46          Tatha              40.96
## 47       Theodora              40.04
## 48        Vanessa              43.90
## 49           Widi              40.14
## 50          Wiwik              43.54

Calculate the Total Revenue (in Monthly)

library(dplyr)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.4.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Data$Revenue      <- Data$Price - Data$Booking_fee
Monthly_Rev       <- Data%>%separate(Date_Sales, c("Year", "Month", "Day"), sep = "-")%>%
                      select(Year, Month, Revenue)
Monthly.Rev        <-aggregate(Revenue ~ Month + Year, data = Monthly_Rev, sum)
Monthly.Rev

##    Month Year Revenue
## 1     01 2018 3154540
## 2     02 2018 2803359
## 3     03 2018 3158899
## 4     04 2018 3189671
## 5     05 2018 3158372
## 6     06 2018 2871184
## 7     07 2018 3065831
## 8     08 2018 3121437
## 9     09 2018 3091354
## 10    10 2018 3184341
## 11    11 2018 3072410
## 12    12 2018 3110241
## 13    01 2019 2992355
## 14    02 2019 2846555
## 15    03 2019 3115809
## 16    04 2019 3169959
## 17    05 2019 3046462
## 18    06 2019 2938620
## 19    07 2019 3262309
## 20    08 2019 3329690
## 21    09 2019 2878173
## 22    10 2019 2990967
## 23    11 2019 2851337
## 24    12 2019 3082581
## 25    01 2020 2935863
## 26    02 2020 2998973
## 27    03 2020 3269532
## 28    04 2020 3116267
## 29    05 2020 3245171
## 30    06 2020 3202455
## 31    07 2020 3022168
## 32    08 2020 3127824
## 33    09 2020 2511022

2 Case 2

Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:

Marital_Status : assign random marital status (“Yes”, “No”)
Address : assign random address (JABODETABEK)
Work_Location : assign random working location (JABODETABEK)
Age : assign a sequence of random numbers (from 19 to 60)
Academic : assign random academic levels (“J.School”,“H.School”, “Undergraduate”, “Master”, “Phd”)
Job : 10 random jobs for each academic levels
Grade : 5 random grades for each Jobs
Income : assign the possible income for each Jobs
Spending : assign the possible spending for each Jobs
Number_of_children: assign a random number in between 0 and 10 (according to marital status)
Private_vehicle : assign the possible private vehicle for each person (“Car”, “motorcycle”, “Public”)
Home : “Rent”, “Own”, “Credit”

2.1 Task 1

Please provide me a data set about the information of 50000 customers refers to each variable above!

marital_status  <-sample(c("Yes", "No"),50000, replace = T)
Address         <-sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),50000, replace = T)
Work_Location   <-sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),50000, replace = T)
Age             <-floor (runif(50000,19,60))
Academic        <-sample(c("J.School","H.School", "Undergraduate", "Master", 
                            "Phd"),50000, replace = T)
Job             <-ifelse(Academic=="J.School", 
                         sample(c("Cleaning Service", "Babysitter",
                                  "Pet Sitter", "Driver",
                                  "Yard Worker", "Barista",
                                  "Server", "Lifeguard",
                                  "Cashier", "House Cleaner"),   
                                    length(Academic=="J.School"), replace=T),
                   ifelse(Academic=="H.School", 
                          sample(c("Repaires", "House Cleaner",
                                   "Dog Walker", "Tutor",
                                   "Writer", "Bank teller",
                                   "Fisherman", "Food service worker",
                                   "Truck Driver", "Administrative Assistant"), 
                                    length(Academic=="H.School"), replace=T),
                    ifelse(Academic=="Undergraduate", 
                           sample(c("Nursing", "Nanny",
                                    "Virtual Assistant", "Warehouse Associate",
                                    "Dog Walker", "Home Health Aide",
                                    "Food service Worker", "Sales Associate",
                                    "Rideshare Driver", "Flight attendant"), 
                                    length(Academic=="Undergraduate"), 
                                      replace=T), 
                    ifelse(Academic=="Master", 
                           sample(c("Software Developer", "AI Scientist",
                                    "Marketing specialist","Business analyst", 
                                    "Psychotherapist", "Engineering", 
                                    "Pharmacist", "Data Analayst",
                                    "Actuary", "Biostatistician"), 
                                    length(Academic=="Master"), replace=T),
                    sample(c("Market Research Analyst", "Product Manager",
                             "Management Consulting", "Quantitative Analyst",
                             "Entrepreneurship", "Finance",
                             "Mathematicians", "Immunology",
                             "Psychology", "Pharmacology"), 
                            length(Academic=="Phd"), replace=T)))))
Grade           <-sample(c("Expert", "Master", "Intermediate", "Skilled",
                           "Newbie"), 50000, replace=T)
Income          <-sample(c(4000:10000),50000, replace = T)
Spending        <-sample(c(3000:7000), 50000, replace = T )
Number_of_children <- ifelse(marital_status=="Yes", sample((c(0:10)), 
                          length(marital_status=="Yes"), replace=T),
                                    "0")

Private_vehicle <- sample(c("Car", "Motorcycle", "Public"),
                          50000, replace=T)
Home            <- sample(c("Rent", "Own", "Credit"),
                          50000, replace=T)
Data2 <-data.frame (marital_status,
                              Address,
                              Work_Location,
                              Age,
                              Academic,
                              Job,
                              Grade,
                              Income,
                              Spending,
                              Number_of_children,
                              Private_vehicle,
                              Home
                              )
library(DT)
datatable(Data2)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.2 Task 2

What kind of important summary statistics you can get from your data set?

summary(Data2)

##  marital_status       Address          Work_Location           Age       
##  Length:50000       Length:50000       Length:50000       Min.   :19.00  
##  Class :character   Class :character   Class :character   1st Qu.:29.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :39.00  
##                                                           Mean   :39.01  
##                                                           3rd Qu.:49.00  
##                                                           Max.   :59.00  
##    Academic             Job               Grade               Income     
##  Length:50000       Length:50000       Length:50000       Min.   : 4000  
##  Class :character   Class :character   Class :character   1st Qu.: 5513  
##  Mode  :character   Mode  :character   Mode  :character   Median : 6996  
##                                                           Mean   : 7001  
##                                                           3rd Qu.: 8494  
##                                                           Max.   :10000  
##     Spending    Number_of_children Private_vehicle        Home          
##  Min.   :3000   Length:50000       Length:50000       Length:50000      
##  1st Qu.:3998   Class :character   Class :character   Class :character  
##  Median :5000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :4996                                                           
##  3rd Qu.:5988                                                           
##  Max.   :7000

The summary we can get is that we can calculate statistical information such as the average value, minimum value, maximum value, first quartile and third quartile from data on age, income, spending, and the number of children.

we can calculate the income, spending and age variables to offer the best insurance that the company can offer so that it gets maximum profit. company can measure the lowest and the highest insurances average offered to customers.

2.3 Task 3

According to your calculations and analysis, which customers are potential for you to maintain?

#average monthly cost for insurance 574$ for individual and family coverage averaged 1634$.  we need to maintain customers that 30% of their savings ( Income- Spending ) more than or equal to 574$ or 1634$

Data2$Customer_savings  <-  Data2$Income - Data2$Spending
Data2$Maintain <- ifelse(Data2$marital_status=="Yes" &
                           0.30*Data2$Customer_savings>1634,"Yes",
                         ifelse(Data2$marital_status=="No" &
                                  0.30*Data2$Customer_savings>574, "Yes",
                                "No"))
print(head(Data2, 5))

##   marital_status   Address Work_Location Age      Academic                  Job
## 1             No     Depok        Bekasi  49        Master Marketing specialist
## 2            Yes Tengerang        Bekasi  59        Master          Engineering
## 3            Yes    Bekasi         Depok  59 Undergraduate     Rideshare Driver
## 4            Yes    Bekasi         Bogor  19        Master              Actuary
## 5            Yes     Depok         Depok  39      J.School              Cashier
##     Grade Income Spending Number_of_children Private_vehicle   Home
## 1  Expert   4087     6241                  0             Car    Own
## 2 Skilled   8345     5993                  2             Car   Rent
## 3  Newbie   4044     3133                  9             Car   Rent
## 4 Skilled   4727     5461                  4             Car   Rent
## 5  Newbie   8039     3243                  4      Motorcycle Credit
##   Customer_savings Maintain
## 1            -2154       No
## 2             2352       No
## 3              911       No
## 4             -734       No
## 5             4796       No

2.4 Task 4

The following types of insurance are available in Indonesia:

Life insurance
Health Insurance
Vehicle Insurance
Home and property insurance
Education insurance
Business Insurance
General insurance
Credit Insurance
Marine Insurance
Travel Insurance

Which insurance product will you provide to your customers?

# Calculation & Analysis (to make your answer below are reasonable)
Insurances <- ifelse(Data2$Age>50 & 
                             Data2$Income>6000,
                             "Life", 
                       
                    ifelse(Data2$Age<50 & 
                          Data2$Income>4000,
                          "Health",  
                          
                    ifelse(Data2$Private_vehicle!="Public" ,
                            "Vehicle", 
                          
                    ifelse(Data2$Home=="Own" &
                          Data2$Private_vehicle!="Public",
                          "Home and property", 
                          
                    ifelse(Data2$Age>25 &
                          Data2$marital_status=="Yes" &
                          Data2$Number_of_children>1 &
                          Data2$Income<6000 &
                          Data2$Spending>3000,
                          "Education",
                         
                    ifelse(Data2$Home=="Credit" & 
                         Data2$Income<10000 &
                         Data2$Spending<7000,
                         "Credit",
                    
                    ifelse(Data2$Income<10000 &
                           Data2$Spending<7000,
                           "Business",
                           
                    ifelse(Data2$Income<10000 &
                             Data2$Job=="Fisherman",
                           "Marine",
                         
                          "General"))))))))

# Add new variable to your data set (assign product for each customers)
Data2$Insurances<-Insurances
print(head(Data2, 10))

##    marital_status   Address Work_Location Age      Academic
## 1              No     Depok        Bekasi  49        Master
## 2             Yes Tengerang        Bekasi  59        Master
## 3             Yes    Bekasi         Depok  59 Undergraduate
## 4             Yes    Bekasi         Bogor  19        Master
## 5             Yes     Depok         Depok  39      J.School
## 6             Yes     Depok        Bekasi  39      J.School
## 7             Yes     Depok       Jakarta  42      H.School
## 8             Yes   Jakarta        Bekasi  36 Undergraduate
## 9             Yes     Depok       Jakarta  50        Master
## 10             No    Bekasi         Depok  30           Phd
##                     Job   Grade Income Spending Number_of_children
## 1  Marketing specialist  Expert   4087     6241                  0
## 2           Engineering Skilled   8345     5993                  2
## 3      Rideshare Driver  Newbie   4044     3133                  9
## 4               Actuary Skilled   4727     5461                  4
## 5               Cashier  Newbie   8039     3243                  4
## 6         House Cleaner  Newbie   6311     6542                  7
## 7         House Cleaner  Newbie   6007     4135                  6
## 8                 Nanny  Master   8113     5683                  2
## 9  Marketing specialist  Newbie   8686     4861                 10
## 10           Immunology Skilled   4943     4478                  0
##    Private_vehicle   Home Customer_savings Maintain Insurances
## 1              Car    Own            -2154       No     Health
## 2              Car   Rent             2352       No       Life
## 3              Car   Rent              911       No    Vehicle
## 4              Car   Rent             -734       No     Health
## 5       Motorcycle Credit             4796       No     Health
## 6           Public    Own             -231       No     Health
## 7           Public Credit             1872       No     Health
## 8           Public    Own             2430       No     Health
## 9              Car   Rent             3825       No    Vehicle
## 10      Motorcycle    Own              465       No     Health

Midterm Exam ~ Data Structures and Algorithms

Vanessa Supit 20194920014

October 29, 2020