Assume you have collected some data set from ABC Property as we can see in the following table:
Id <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
"Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Juen","Jerrel",
"Imelda","Widi","Theodora","Elvani","Jonathan",
"Sofia","Abraham","Siti","Niko","Sefli",
"Bene", "Diana", "Pupe", "Andi", "Tatha",
"Endri", "Monika", "Hans", "Debora","Hanifa",
"James", "Jihan", "Friska","Ardiwan", "Bakti",
"Anthon","Amry", "Wiwik", "Bastian", "Budi",
"Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
"Peronia","Lavesh","Alindra","Sweethome", "Asera",
"Teradamai","Albasia", "Adara","Neon","Arana",
"Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price <- sample(c(7000:15000),10000, replace = T)
Date_Sales <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement <- sample(c(1:20), 10000, replace = T)
Data <- data.frame(Id,
Marketing_Name,
Work_Exp,
City,
Cluster,
Price,
Date_Sales,
Advertisement)
library(DT)
datatable(Data)Categorize the Price into three groups:
Assign it into a new variable called Class by using If and else if statement.
Categorize the Price into six groups:
Assign it into a new variable called Booking_fee by using if and else if statements.
Data$Booking_fee <- ifelse(Data$Price >= 13000,
Data$Price*0.1,
ifelse(Data$Price>=11000,
Data$Price*0.09,
ifelse(Data$Price>=10000,
Data$Price*0.08,
ifelse(Data$Price>=9000,
Data$Price*0.07,
ifelse(Data$Price>=8000,
Data$Price*0.06,
Data$Price*0.05)))))
library(DT)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.
If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.
In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Marketing_Sales <- aggregate(Price ~ Marketing_Name,
data = Data,
sum)
Best_Marketing <- Marketing_Sales[
order(Marketing_Sales$Price,
decreasing = T),] %>%
head (1) %>%
print()## Marketing_Name Price
## 10 Bastian 2294323
library(dplyr)
City_Cluster <- aggregate(Price ~ City + Cluster,
data = Data,
sum)
Best_City_Cluster <- City_Cluster [
order(City_Cluster$Price, decreasing = T),] %>%
head(1)%>%
print()## City Cluster Price
## 80 Tengerang Sweethome 1448575
Data$Advertisement <- as.numeric(Data$Advertisement)
Data$Advertisement_Cost <- Data$Advertisement * 4
Total_Cost <- sum(Data$Advertisement_Cost)
paste("The total cost was", Total_Cost)## [1] "The total cost was 420236"
Marketing_Ads_Avg <- aggregate(Advertisement_Cost ~ Marketing_Name,
data = Data,
mean)
library(DT)
datatable(Marketing_Ads_Avg)library(dplyr)
Data$Revenue <- Data$Booking_fee + Data$Price
Monthly_Revenue <- Data %>%
separate(Date_Sales, c("Year", "Month", "Day"), sep = "-") %>%
select(Year, Month, Revenue)
Total_Monthly_Revenue <- aggregate(Revenue ~ Year+Month,Monthly_Revenue, sum)
Total_Monthly_Revenue <- Total_Monthly_Revenue[order(Total_Monthly_Revenue$Year, decreasing = F),]
library(DT)
datatable(Total_Monthly_Revenue)Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:
Please provide me a data set about the information of 50000 customers refers to each variable above!
Marital_Status <- sample(c("yes","no"),
50000,
T)
Address <- sample(c("Jakarta",
"Bogor",
"Depok",
"Tangerang",
"Bekasi"),
50000,
T)
Work_Location <- sample(c("Jakarta",
"Bogor",
"Depok",
"Tangerang",
"Bekasi"),
50000,
T)
Age <- sample(19:60,
50000,
T)
Academic <- sample(c("J.School",
"H.School",
"Undergraduate",
"Master",
"PhD"),50000,T)
Job <- ifelse (Academic=="J.School",
sample(c("Office Boy/Office Girl", "kitchen helper",
"Customer Service", "Buruh",
"Cleaning Service", "Security",
"Packing Barang","Waiter/Waitress",
"Staff Gudang", "Driver")),
ifelse(Academic=="H.School",
sample(c("Staff Accounting", "Marketing",
"Operator Produksi", "Guru Les",
"Staff Administrasi", "Polisi",
"Data Entry", "Kasir",
"Back Office", "ABK")),
ifelse(Academic=="Undergraduate",
sample(c("Pengajar", "Pilot",
"Pengacara Konsultan", "Nakhoda",
"Software Developer", "Masinis",
"Staff Desain Perencanaan", "PNS",
"Market Researcher", "Dokter")),
ifelse(Academic=="Master",
sample(c("Aktuaris", "Data Analyst",
"Data Scientist", "Dokter Spesialis",
"Branch Manager", "Enterpreneur",
"Notaris", "Jaksa",
"Psikolog", "Supervisor")),
sample(c("CEO", "Hakim",
"Researcher", "Manager",
"Komandan", "President",
"Menteri", "Neuroscientist",
"Rektor", "Kaprodi"))
))))
Grade <- sample(c("Novice",
"Adv. Beginner",
"Competent",
"Proficient",
"Expert"),
50000,
T)
Salary_Function <- function(x,y){
# Basic_Salary based on Academic
J.School <- sample(c(500000:2000000))
H.School <- sample(c(2000000:3000000))
Undergraduate <- sample(c(3000000:5000000))
Master <- sample(c(5000000:15000000))
PhD <- sample(c(15000000:50000000))
Basic_Salary<-ifelse(x=="J.School",
J.School,
ifelse(x=="H.School",
H.School,
ifelse(x=="Undergraduate",
Undergraduate,
ifelse(x=="Master",
Master,
PhD))))
# Extra_Salary based on the Grade (Experience and Skill)
Novice <- 1
Adv.Beginner <- 1.3
Competent <- 1.5
Proficient <- 1.8
Expert <- 2
Extra_Salary <- ifelse(y=="Novice",
Novice,
ifelse(y=="Adv.Beginner",
Adv.Beginner,
ifelse(y=="Competent",
Competent,
ifelse(y=="Proficient",
Proficient,
Expert))))
result <- round(Basic_Salary * Extra_Salary, digits = -3)
return(result)
}
Income <- Salary_Function(Academic, Grade)
# Ideal spending is 75% of income, and save the rest
Spending <- 0.75*Income
Number_of_Children <- ifelse(Marital_Status=="yes",
sample(c(0:10)),
0)
# Requirement to be able to have a motorcycle is the amount of 24 monthly saving are more than 30,000,000 and the requirement to be able to have a car is the amount of 48 monthly saving are more than 100,000,000
Private_Vehicle <- ifelse(48*0.25*Income >= 100000000,
sample(c("Car", "Motorcycle", "Public")),
ifelse(24*0.25*Income >= 30000000,
sample(c("Motorcycle", "Public")),
"Public"))
Home <- sample(c("Rent", "Own", "Credit"),50000, T)
Customer <- data.frame(Marital_Status,
Address,
Work_Location,
Age,
Academic,
Job,
Grade,
Income,
Spending,
Number_of_Children,
Private_Vehicle,
Home)
library(DT)
datatable(Customer)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
What kind of important summary statistics you can get from your data set?
## $Marital_Status
## Length Class Mode
## 50000 character character
##
## $Address
## Length Class Mode
## 50000 character character
##
## $Work_Location
## Length Class Mode
## 50000 character character
##
## $Age
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 29.00 40.00 39.57 50.00 60.00
##
## $Academic
## Length Class Mode
## 50000 character character
##
## $Job
## Length Class Mode
## 50000 character character
##
## $Grade
## Length Class Mode
## 50000 character character
##
## $Income
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500000 3530000 6641000 16749662 20754500 99929000
##
## $Spending
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 375000 2647500 4980750 12562247 15565875 74946750
##
## $Number_of_Children
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 2.497 5.000 10.000
##
## $Private_Vehicle
## Length Class Mode
## 50000 character character
##
## $Home
## Length Class Mode
## 50000 character character
According to your calculations and analysis, which customers are potential for you to maintain?
# Average insurance premium is Rp. 250,000. SO, will be best for us to maintain the customer that 30% of their savings is more equal than Rp. 250,000.
Customer$Maintain <- ifelse(0.3*0.25*Customer$Income >= 250000,
"yes",
"no")
library(DT)
library(dplyr)
subset(Customer, select = c(8, 13)) %>% datatable()## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
The following types of insurance are available in Indonesia:
Assume my company provide only 4 insurances below with premium Rp 250,000 every month
Which insurance product will you provide to your customers?
# Calculation & Analysis (to make your answer below are reasonable)
# For people with vehicle and house, we will offer all of the product we have. We will assign product based on how much they have. If 30% of their savings is more equal than Rp. 250,000 we will assign only one product, and so on.
# For people with only vehicle, we will offer all of the product we have except for property Insurance. We will assign product based on how much they have. If 30% of their savings is more equal than Rp. 250,000 we will assign only one product, and so on.
# For people with only house, we will offer all of the product we have except for vehicle insurance. We will assign product based on how much they have. If 30% of their savings is more equal than Rp. 400,000 we will assign only one product, and so on.
# For people without either house and vehicle, we will offer only life/health insurance. We will assign product based on how much they have. If 30% of their savings is more equal than Rp. 400,000 we will assign only one product, and if their savings more than Rp. 800,000 we will assign both.
# The reason why we always assign Health and Life is because all of our customers are worker that risk their life to work everyday. So, our company decide that these two are the most important insurance after all.
# In addition, we will not offer our product neither agree the submission of people whose 30% of their savings is less than Rp. 250,000 because of the risk of being late or not even paying.# Add new variable to your data set (assign product for each customers)
Customer$Insurance_Product <- ifelse(0.3*0.25*Customer$Income >= 1000000 &
Customer$Private_Vehicle != "Public" &
Customer$Home != "Rent",
paste(sample(c("Vehicle", "Property",
"Health", "Life"),
size = sample(c(1:4))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 750000 &
Customer$Private_Vehicle != "Public" &
Customer$Home != "Rent",
paste(sample(c("Vehicle", "Property",
"Health", "Life"),
size = sample(c(1:3))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 500000 &
Customer$Private_Vehicle != "Public" &
Customer$Home != "Rent",
paste(sample(c("Vehicle", "Property",
"Health", "Life"),
size = sample(c(1:2))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 250000 &
Customer$Private_Vehicle != "Public" &
Customer$Home != "Rent",
sample(c("Vehicle", "Property",
"Health", "Life"),
size = 1),
ifelse(0.3*0.25*Customer$Income >= 750000 &
Customer$Private_Vehicle != "Public" &
Customer$Home == "Rent",
paste(sample(c("Vehicle",
"Health",
"Life"),
size = sample(c(1:3))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 500000 &
Customer$Private_Vehicle != "Public" &
Customer$Home == "Rent",
paste(sample(c("Vehicle",
"Health",
"Life"),
size = sample(c(1:2))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 250000 &
Customer$Private_Vehicle != "Public" &
Customer$Home == "Rent",
sample(c("Vehicle",
"Health",
"Life"),
size = 1),
ifelse(0.3*0.25*Customer$Income >= 750000 &
Customer$Private_Vehicle == "Public" &
Customer$Home != "Rent",
paste(sample(c("Property",
"Health",
"Life"),
size = sample(c(1:3))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 500000 &
Customer$Private_Vehicle == "Public" &
Customer$Home != "Rent",
paste(sample(c("Property",
"Health",
"Life"),
size = sample(c(1:2))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 250000 &
Customer$Private_Vehicle == "Public" &
Customer$Home != "Rent",
sample(c("Property",
"Health",
"Life"),
size = 1),
ifelse(0.3*0.25*Customer$Income >= 500000 &
Customer$Private_Vehicle == "Public" &
Customer$Home == "Rent",
paste(sample(c("Health",
"Life"),
size = sample(c(1:2))),
collapse = ", "),
ifelse(0.3*0.25*Customer$Income >= 250000 &
Customer$Private_Vehicle == "Public" &
Customer$Home == "Rent",
sample(c("Health",
"Life"),
size = 1),
"No Product"
))))))))))))
library(DT)
datatable(Customer)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html