1 Case 1

Assume you have collected some data set from ABC Property as we can see in the following table:

Id             <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
                        "Jeffry","Nikita","Kefas","Siana","Lala",
                        "Fallen","Ardifo","Kevin","Juen","Jerrel",
                        "Imelda","Widi","Theodora","Elvani","Jonathan",
                        "Sofia","Abraham","Siti","Niko","Sefli",
                        "Bene", "Diana", "Pupe", "Andi", "Tatha",
                        "Endri", "Monika", "Hans", "Debora","Hanifa",
                        "James", "Jihan", "Friska","Ardiwan", "Bakti",
                        "Anthon","Amry", "Wiwik", "Bastian", "Budi",
                        "Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp       <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
                        5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
                        3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
                        4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
                        6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City           <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster        <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
                           "Peronia","Lavesh","Alindra","Sweethome", "Asera",
                           "Teradamai","Albasia", "Adara","Neon","Arana",
                           "Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price          <- sample(c(7000:15000),10000, replace = T)
Date_Sales     <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement  <- sample(c(1:20), 10000, replace = T)
Data           <- data.frame(Id, 
                             Marketing_Name,
                             Work_Exp,
                             City,
                             Cluster,
                             Price,
                             Date_Sales,
                             Advertisement)
library(DT)
datatable(Data)

1.1 Task 1

Categorize the Price into three groups:

$\text{High} > 1200$
$1000 \le \text{Medium} \le 12000$
$\text{Low} < 10000$

Assign it into a new variable called Class by using If and else if statement.

Data$Class <- ifelse(Data$Price > 12000,
                     "High",
              ifelse(Data$Price < 10000,
                      "Low",
                      "Medium"))
library(DT)
datatable(Data)

Categorize the Price into six groups:

5 % Booking fee if the $\text{Price} < 8000$
6 % Booking fee if the $8000 \le \text{Price} < 9000$
7 % Booking fee if the $9000 \le \text{Price} < 10000$
8 % Booking fee if the $10000 \le \text{Price} < 11000$
9 % Booking fee if the $11000 \le \text{Price} < 13000$
10 % Booking fee if the $13000 \le \text{Price} \le 15000$

Assign it into a new variable called Booking_fee by using if and else if statements.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data$BookingFee <- ifelse(Data$Price < 8000, Data$Price*0.05,
                          ifelse(Data$Price < 9000, Data$Price*0.06,
                                ifelse(Data$Price < 10000, Data$Price*0.07,
                                      ifelse(Data$Price < 11000, Data$Price*0.08,
                                            ifelse(Data$Price < 13000, Data$Price*0.09,
                                                                       Data$Price*0.10)))))
library(DT)
datatable(Data)

1.2 Task 3

According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.

for (ABC_Property in "Siana"){
  Sales <- filter(Data, Marketing_Name == ABC_Property)}
library(DT)
datatable(Sales)

1.3 Task 4

If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.

library(dplyr)
for(ABC_Property in Data$Marketing_Name){
  Data$Bonus <- ifelse(Data$Work_Exp > 3, Data$BookingFee*0.03, Data$BookingFee*0.02)
  break }
BonusPersonal <- data.frame(aggregate(Data$Bonus, by = list(Marketing_Name = Data$Marketing_Name, 
                                                            Work_Exp = Data$Work_Exp), FUN = sum) %>%
                                                            dplyr::rename("Bonus" = x))

MyBonus   <- BonusPersonal %>% 
             filter(Marketing_Name == "Siana")
library(DT)
datatable(MyBonus)

## Task 5

In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:

Who is the best marketing?

Marketing <- data.frame(aggregate(Data$Price, by = list(Marketing_Name = Data$Marketing_Name), 
                                                  FUN = sum))
Best_Marketing <- Marketing %>%
                  filter(x == max(Marketing$x)) %>%
                  dplyr::rename("Total Price" = x) %>%
                  print()

##   Marketing_Name Total Price
## 1          Angel     2286258

datatable(Best_Marketing)

Which City and Cluster is the most profitable?

City_Cluster <- data.frame(aggregate(Data$Price, by= list(City = Data$City, 
                                                          Cluster = Data$Cluster), 
                                           FUN = sum ))
Best_CC <- City_Cluster %>%
           filter(x == max(City_Cluster$x)) %>%
           dplyr::rename("Total Price" = x) %>%
           print()

##    City Cluster Total Price
## 1 Depok Alindra     1385192

datatable(Best_CC)

Calculate your total advertisement cost, if you have to pay them $4 once.

Cost <- Data$Advertisement*4
Name <- data.frame(Marketing_Name,
                   Cost)
Ads.Pay <- data.frame(aggregate(Name$Cost, by = list(Name = Name$Marketing_Name),
                                FUN = sum) %>% 
                                dplyr::rename("My Pay" = x))
MyPay <- Ads.Pay[Ads.Pay$Name == "Siana",]
library(DT)
datatable(MyPay)

Calculate the average advertisement cost for each marketing.

Average_Ads.Pay <- data.frame(aggregate(Name$Cost, by = list(Name = Data$Marketing_Name),
                                                      FUN = mean)) %>%
                                                      dplyr::rename("Average Ads Cost" = x)
library(DT)
datatable(Average_Ads.Pay)

Calculate the Total Revenue (in Monthly)

library(dplyr)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ readr   1.4.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Date            <- Data %>% 
                   separate(Date_Sales, c("year","month","day"), sep = "-")
Date$Revenue    <- Data$Price + Data$BookingFee - Data$Bonus - Name$Cost
Monthly         <- aggregate.data.frame(Date$Revenue, by = list(Name = Data$Marketing_Name, 
                                                                Month = Date$month, 
                                                                Year = Date$year), 
                                                      FUN = sum) %>%
                   dplyr::rename("Revenue Monthly" = x)
library(DT)
datatable(Monthly)

2 Case 2

Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:

Marital_Status : assign random marital status (“Yes”, “No”)
Address : assign random address (JABODETABEK)
Work_Location : assign random working location (JABODETABEK)
Age : assign a sequence of random numbers (from 19 to 60)
Academic : assign random academic levels (“J.School”,“H.School”, “Undergraduate”, “Master”, “Phd”)
Job : 10 random jobs for each academic levels
Grade : 5 random grades for each Jobs
Income : assign the possible income for each Jobs
Spending : assign the possible spending for each Jobs
Number_of_children: assign a random number in between 0 and 10 (according to marital status)
Private_vehicle : assign the possible private vehicle for each person (“Car”, “motorcycle”, “Public”)
Home : “Rent”, “Own”, “Credit”

2.1 Task 1

Please provide me a data set about the information of 50000 customers refers to each variable above!

library(plyr)

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:purrr':
## 
##     compact

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

Marital_Status           <- sample(c("Yes", "No"),
                                     50000, replace = T)

Address                  <- sample(c("Jakarta",
                                     "Bogor",
                                     "Depok",
                                     "Tangerang",
                                     "Bekasi"),
                                     50000, replace = T)

Work_Loc                 <- sample(c("Jakarta",
                                         "Bogor",
                                         "Depok",
                                         "Tangerang",
                                         "Bekasi"),
                                         50000, replace = T)

Age                      <- sample(c(19:60),
                                     50000, replace = T)

Academic                 <- sample(c("Junior School",
                                     "High School",
                                     "Undergraduate",
                                     "Master",
                                     "Ph.D"),
                                     50000, replace = T)

Jobs                     <- ifelse(Academic == "Junior School",
                                   sample(c("Tukang Parkir",      "OB",
                                            "Tukang Cuci Piring", "Pelayan",
                                            "Satpam",             "Pembantu",
                                            "Montir",             "Tukang Judi",
                                            "Bandar Narkoba",     "Tukang Potong"),
                                            50000, replace = T),
                            ifelse(Academic == "High School",
                                   sample(c("Barber",             "Kasir",
                                            "Tukang Masak",       "Kurir",
                                            "Pelayan",            "Montir",
                                            "Admin",              "Staff",
                                            "Selebgram",          "Model"),
                                            50000, replace = T),
                            ifelse(Academic == "Undergraduate",
                                   sample(c("Accounting",         "Staff",
                                            "HRD",                "Content Writer",
                                            "Teller",             "Customer Service",
                                            "Admin Sosmed",       "Youtuber",
                                            "Entrepreneur",       "Translator"),
                                            50000, replace = T),
                            ifelse(Academic == "Master",
                                   sample(c("Dosen",              "Designer",
                                            "Guru",               "Presenter",
                                            "Komedian",           "Komikus",
                                            "CEO",                "Direktur Keuangan",
                                            "Tax Consultant",     "Pilot"),
                                            50000, replace = T),
                                   sample(c("Rektor", "CEO",
                                            "Public Speaker", "Psikiater",
                                            "Dokter", "Surgeon",
                                            "Programmer", "Auditor",
                                            "Entrepreneur", "Ilmuan"),
                                            50000, replace = T)))))

Grades                   <- sample(c("Low",
                                     "Beginner",
                                     "Middle",
                                     "High",
                                     "Pro"), 50000, replace = T)

Incomes                  <- ifelse(Academic == "Junior School",
                                   round_any(runif(length(Academic == "Junior School"), 
                                                   1000000,
                                                   1500000),
                                                   100000),
                            ifelse(Academic == "High School",
                                   round_any(runif(length(Academic == "High School"), 
                                                   2000000,
                                                   3000000),
                                                   100000),
                            ifelse(Academic == "Undergraduate",
                                   round_any(runif(length(Academic == "Undergraduate"), 
                                                   5000000,
                                                   7000000),
                                                   100000),
                            ifelse(Academic == "Master",
                                   round_any(runif(length(Academic == "Master"), 
                                                   9000000,
                                                   12000000),
                                                   100000),
                            round_any(runif(length(Academic == "Ph.D"), 
                                                   15000000,
                                                   20000000),
                                                   100000)))))

Spending                 <- Incomes*round_any((runif(50000, 0.5, 0.6)), 0.01)

Number_Of_Children       <- ifelse(Marital_Status == "Yes",
                                   sample(c(0:10)), 1)

Private_Vehicles         <- sample(c("Car",
                                     "Motorcycle",
                                     "Public"),
                                     50000, replace = T)

Home                     <- sample(c("Rent",
                                     "Own",
                                     "Credit"),
                                     50000, replace = T)

ABCIns_Company           <- data.frame(Marital_Status,
                                       Address,
                                       Work_Loc,
                                       Age,
                                       Academic,
                                       Jobs,
                                       Grades,
                                       Incomes,
                                       Spending,
                                       Number_Of_Children,
                                       Private_Vehicles,
                                       Home)
library(DT)
datatable(ABCIns_Company)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.2 Task 2

What kind of important summary statistics you can get from your data set? Answer : Important summary statistics that can we get from data set are : - Age, to have insurance and retain customers in the ABC Insurance Company is have a minimum age of 19 years and maximum age of 60 years. - Income, as a comparison of whether customers can apply for insurance to companies with an income of at least 1 million. - Spending, as a comparison where expenditures cannot be greater than income, that is, 1/2 of income. Income and spending are used to see if a customer can make a stable payment. - Number of children, knowing the number of children owned can be used as a form of comparison in making insurance, for example education insurance.

summary(ABCIns_Company)

##  Marital_Status       Address            Work_Loc              Age       
##  Length:50000       Length:50000       Length:50000       Min.   :19.00  
##  Class :character   Class :character   Class :character   1st Qu.:29.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :40.00  
##                                                           Mean   :39.57  
##                                                           3rd Qu.:50.00  
##                                                           Max.   :60.00  
##    Academic             Jobs              Grades             Incomes        
##  Length:50000       Length:50000       Length:50000       Min.   : 1000000  
##  Class :character   Class :character   Class :character   1st Qu.: 2300000  
##  Mode  :character   Mode  :character   Mode  :character   Median : 6000000  
##                                                           Mean   : 7539210  
##                                                           3rd Qu.:11200000  
##                                                           Max.   :20000000  
##     Spending        Number_Of_Children Private_Vehicles       Home          
##  Min.   :  500000   Min.   : 0.000     Length:50000       Length:50000      
##  1st Qu.: 1239000   1st Qu.: 1.000     Class :character   Class :character  
##  Median : 3294000   Median : 1.000     Mode  :character   Mode  :character  
##  Mean   : 4146088   Mean   : 3.001                                          
##  3rd Qu.: 6160000   3rd Qu.: 5.000                                          
##  Max.   :12000000   Max.   :10.000

2.3 Task 3

According to your calculations and analysis, which customers are potential for you to maintain? - Potential customers that can be maintain is where 3% of income smaller than customers income - spending.

ABCIns_Company$Potential <- ifelse(0.3*ABCIns_Company$Incomes < ABCIns_Company$Incomes - ABCIns_Company$Spending,
                                   "Yes",
                                   "No")
Potential <- ABCIns_Company[ABCIns_Company$Potential == "Yes",]
library(DT)
datatable(Potential)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

2.4 Task 4

The following types of insurance are available in Indonesia:

Life insurance
Health Insurance
Vehicle Insurance
Home and property insurance
Education insurance
Business Insurance
General insurance
Credit Insurance
Marine Insurance
Travel Insurance

Which insurance product will you provide to your customers?

# Calculation & Analysis (to make your answer below are reasonable)
Customers_Insurances <- 
              ifelse(ABCIns_Company$Age > 50 & 
                     ABCIns_Company$Incomes > 1000000,
                          "Life Insurance", 
              ifelse(ABCIns_Company$Age < 50 & 
                     ABCIns_Company$Incomes > 2000000,
                          "Health Insurance",  
              ifelse(ABCIns_Company$Private_Vehicles == "Car",
                          "Vehicle Insurance", 
              ifelse(ABCIns_Company$Home == "Own" &
                     ABCIns_Company$Private_Vehicles == "Motorcycle",
                          "Home and property Insurance", 
              ifelse(ABCIns_Company$Age > 30 &
                     ABCIns_Company$Marital_Status == "Yes" &
                     ABCIns_Company$Number_Of_Children > 1 &
                     ABCIns_Company$Incomes < 2000000,
                          "General Insurance",
              ifelse(ABCIns_Company$Home == "Credit" & 
                     ABCIns_Company$Incomes < 3000000 &
                     ABCIns_Company$Spending < 700000,
                          "Credit Insurance",
              ifelse(ABCIns_Company$Jobs == "Staff" &
                     ABCIns_Company$Incomes < 1000000,
                           "Business Insurance",
              ifelse(ABCIns_Company$Age < 25 &
                     ABCIns_Company$Incomes > 1000000,
                           "Education Insurance",
                           "Marine Insurance"))))))))

# Add new variable to your data set (assign product for each customers)
ABCIns_Company$Customers_Insurances <- Customers_Insurances
library(DT)
datatable(ABCIns_Company)

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Laptop saya sangat amat ngelag pak, sampe saya sangat amat sabar pak nungguinnya, ngefreeze bisa 20 menit, udah gitu ga cuma sekali pak, mohon dimaklumi ya pak :(, maaf saya curhat pak >-<

Midterm Exam ~ Data Structures and Algorithms

Siana Darma Putri - 20194920012

October 29, 2020