Assume you have collected some data set from ABC Property
as we can see in the following table:
Id <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
"Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Juen","Jerrel",
"Imelda","Widi","Theodora","Elvani","Jonathan",
"Sofia","Abraham","Siti","Niko","Sefli",
"Bene", "Diana", "Pupe", "Andi", "Tatha",
"Endri", "Monika", "Hans", "Debora","Hanifa",
"James", "Jihan", "Friska","Ardiwan", "Bakti",
"Anthon","Amry", "Wiwik", "Bastian", "Budi",
"Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
"Peronia","Lavesh","Alindra","Sweethome", "Asera",
"Teradamai","Albasia", "Adara","Neon","Arana",
"Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price <- sample(c(7000:15000),10000, replace = T)
Date_Sales <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement <- sample(c(1:20), 10000, replace = T)
Data <- data.frame(Id,
Marketing_Name,
Work_Exp,
City,
Cluster,
Price,
Date_Sales,
Advertisement)
library(DT)
datatable(Data)
Categorize the Price
into three groups:
Assign it into a new variable called Class
by using If
and else if
statement.
category_price<-ifelse(Data$Price>12000, "High"
, ifelse(Data$Price<10000, "Low",
"Medium"))
Data$Class<-category_price
print(head(Data, 5))
## Id Marketing_Name Work_Exp City Cluster Price Date_Sales Advertisement
## 1 1 Angel 1.3 Depok Albasia 10395 2018-10-26 8
## 2 2 Sherly 2.4 Bekasi Victoria 10250 2019-09-14 4
## 3 3 Vanessa 2.5 Bekasi Asera 11458 2020-04-27 2
## 4 4 Irene 3.6 Jakarta Lavesh 9610 2018-01-16 12
## 5 5 Julian 3.7 Jakarta Asera 7978 2020-06-11 16
## Class
## 1 Medium
## 2 Medium
## 3 Medium
## 4 Low
## 5 Low
Categorize the Price
into six groups:
Assign it into a new variable called Booking_fee
by using if
and else if
statements.
Booking_price<-ifelse(Data$Price<8000, Data$Price*0.05
, ifelse(Data$Price<9000 & Data$Price>8000, Data$Price*0.06,
ifelse(Data$Price<10000 & Data$Price>9000, Data$Price*0.07,
ifelse(Data$Price<11000 & Data$Price>10000, Data$Price*0.08,
ifelse(Data$Price>11000 & Data$Price<13000, Data$Price*0.09,
Data$Price*0.1)))))
Data$Booking_fee<-Booking_price
print(head(Data, 5))
## Id Marketing_Name Work_Exp City Cluster Price Date_Sales Advertisement
## 1 1 Angel 1.3 Depok Albasia 10395 2018-10-26 8
## 2 2 Sherly 2.4 Bekasi Victoria 10250 2019-09-14 4
## 3 3 Vanessa 2.5 Bekasi Asera 11458 2020-04-27 2
## 4 4 Irene 3.6 Jakarta Lavesh 9610 2018-01-16 12
## 5 5 Julian 3.7 Jakarta Asera 7978 2020-06-11 16
## Class Booking_fee
## 1 Medium 831.60
## 2 Medium 820.00
## 3 Medium 1031.22
## 4 Low 672.70
## 5 Low 398.90
According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property
company, how could you collected all information about your sales by using for
statement.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Id Marketing_Name Work_Exp City Cluster Price Date_Sales
## 1 3 Vanessa 2.5 Bekasi Asera 11458 2020-04-27
## 2 53 Vanessa 2.5 Bogor Mutiara 8507 2019-04-28
## 3 103 Vanessa 2.5 Tengerang Primadona 13339 2018-10-17
## 4 153 Vanessa 2.5 Bekasi Adara 7053 2019-04-29
## 5 203 Vanessa 2.5 Jakarta Victoria 14218 2019-08-20
## 6 253 Vanessa 2.5 Tengerang Alamanda 14964 2020-05-19
## 7 303 Vanessa 2.5 Jakarta Palmyra 13843 2020-01-31
## 8 353 Vanessa 2.5 Bogor Neon 7953 2020-03-23
## 9 403 Vanessa 2.5 Tengerang Narada 11171 2018-03-07
## 10 453 Vanessa 2.5 Bogor Adara 13339 2019-07-28
## Advertisement Class Booking_fee
## 1 2 Medium 1031.22
## 2 9 Low 510.42
## 3 18 High 1333.90
## 4 2 Low 352.65
## 5 16 High 1421.80
## 6 11 High 1496.40
## 7 19 High 1384.30
## 8 4 Low 397.65
## 9 9 Medium 1005.39
## 10 10 High 1333.90
If you will get pay 2% bonus of the Booking fee
per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break
statements.
library(dplyr)
list<-c()
for (i in Data$Marketing_Name){
Data$Bonus <- ifelse(Data$Work_Exp>3, Data$Booking_fee*0.03, Data$Booking_fee*0.02)
list[[i]]<- aggregate(Bonus ~ Marketing_Name, data = Data, sum, T)
break}
result<-list[[i]]
result <- result[order(result$Bonus,decreasing=T),]
result%>%filter(Marketing_Name=="Vanessa")%>%print()
## Marketing_Name Bonus
## 1 Vanessa 3710.032
In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:
library(dplyr)
list_by_Price <-aggregate(Price~ Marketing_Name , data = Data, sum)
result_price <-list_by_Price[order(list_by_Price$Price,decreasing=T),]
list_by_adv <-aggregate(Advertisement~ Marketing_Name , data = Data, sum)
Datasales <- data.frame(left_join(result_price, list_by_adv))%>%head(1)
## Joining, by = "Marketing_Name"
## Marketing_Name Price Advertisement
## 1 Fallen 2249681 1988
## [1] "the best marketing is Fallen"
library(dplyr)
Profitable_City_Cluster <- aggregate ( Price ~ Cluster + City, data= Data, sum)
Data_City_Cluster <-Profitable_City_Cluster[order(Profitable_City_Cluster$Price,decreasing=T),]%>%head(1)
Data_City_Cluster
## Cluster City Price
## 36 Sweethome Bogor 1356433
## [1] "Sweethome and Bogor is the most profitable"
library(dplyr)
list<-c()
for (i in "Vanessa"){
list[[i]]<- aggregate(Advertisement ~ Marketing_Name, data = Data, sum, T)
break}
result<-list[[i]]
Total.ADV<- function(x){
Count <-result$Advertisement*4
Total <- paste(Count, sep = "", "$")}
result$Advertisement_cost<-Total.ADV(result$Advertisement)
result%>%filter(Marketing_Name=="Vanessa")%>%print()
## Marketing_Name Advertisement Advertisement_cost
## 1 Vanessa 2196 8784$
Data$Advertisement.cost <- Data$Advertisement*4
aggregate(Advertisement.cost ~ Marketing_Name, data= Data, mean)
## Marketing_Name Advertisement.cost
## 1 Abraham 41.20
## 2 Amry 41.44
## 3 Andi 38.42
## 4 Angel 43.22
## 5 Anthon 42.98
## 6 Ardifo 44.44
## 7 Ardiwan 42.18
## 8 Arry 41.66
## 9 Bakti 42.18
## 10 Bastian 43.80
## 11 Bene 42.82
## 12 Budi 41.54
## 13 Debora 42.82
## 14 Diana 39.06
## 15 Eliando 42.10
## 16 Elvani 41.28
## 17 Endri 38.98
## 18 Fallen 39.76
## 19 Friska 40.22
## 20 Hanifa 40.86
## 21 Hans 41.02
## 22 Imelda 44.00
## 23 Irene 39.82
## 24 James 41.50
## 25 Jeffry 43.34
## 26 Jerrel 41.74
## 27 Jihan 43.40
## 28 Jonathan 45.32
## 29 Juen 40.74
## 30 Julian 40.48
## 31 Kefas 42.10
## 32 Kevin 40.48
## 33 Lala 41.82
## 34 Leo 45.44
## 35 Matius 40.72
## 36 Monika 44.62
## 37 Nikita 44.58
## 38 Niko 44.36
## 39 Pupe 41.80
## 40 Sefli 42.86
## 41 Sherly 42.66
## 42 Siana 38.92
## 43 Simon 43.50
## 44 Siti 43.92
## 45 Sofia 42.78
## 46 Tatha 40.96
## 47 Theodora 40.04
## 48 Vanessa 43.90
## 49 Widi 40.14
## 50 Wiwik 43.54
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Data$Revenue <- Data$Price - Data$Booking_fee
Monthly_Rev <- Data%>%separate(Date_Sales, c("Year", "Month", "Day"), sep = "-")%>%
select(Year, Month, Revenue)
Monthly.Rev <-aggregate(Revenue ~ Month + Year, data = Monthly_Rev, sum)
Monthly.Rev
## Month Year Revenue
## 1 01 2018 3154540
## 2 02 2018 2803359
## 3 03 2018 3158899
## 4 04 2018 3189671
## 5 05 2018 3158372
## 6 06 2018 2871184
## 7 07 2018 3065831
## 8 08 2018 3121437
## 9 09 2018 3091354
## 10 10 2018 3184341
## 11 11 2018 3072410
## 12 12 2018 3110241
## 13 01 2019 2992355
## 14 02 2019 2846555
## 15 03 2019 3115809
## 16 04 2019 3169959
## 17 05 2019 3046462
## 18 06 2019 2938620
## 19 07 2019 3262309
## 20 08 2019 3329690
## 21 09 2019 2878173
## 22 10 2019 2990967
## 23 11 2019 2851337
## 24 12 2019 3082581
## 25 01 2020 2935863
## 26 02 2020 2998973
## 27 03 2020 3269532
## 28 04 2020 3116267
## 29 05 2020 3245171
## 30 06 2020 3202455
## 31 07 2020 3022168
## 32 08 2020 3127824
## 33 09 2020 2511022
Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:
Please provide me a data set about the information of 50000 customers refers to each variable above!
marital_status <-sample(c("Yes", "No"),50000, replace = T)
Address <-sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),50000, replace = T)
Work_Location <-sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),50000, replace = T)
Age <-floor (runif(50000,19,60))
Academic <-sample(c("J.School","H.School", "Undergraduate", "Master",
"Phd"),50000, replace = T)
Job <-ifelse(Academic=="J.School",
sample(c("Cleaning Service", "Babysitter",
"Pet Sitter", "Driver",
"Yard Worker", "Barista",
"Server", "Lifeguard",
"Cashier", "House Cleaner"),
length(Academic=="J.School"), replace=T),
ifelse(Academic=="H.School",
sample(c("Repaires", "House Cleaner",
"Dog Walker", "Tutor",
"Writer", "Bank teller",
"Fisherman", "Food service worker",
"Truck Driver", "Administrative Assistant"),
length(Academic=="H.School"), replace=T),
ifelse(Academic=="Undergraduate",
sample(c("Nursing", "Nanny",
"Virtual Assistant", "Warehouse Associate",
"Dog Walker", "Home Health Aide",
"Food service Worker", "Sales Associate",
"Rideshare Driver", "Flight attendant"),
length(Academic=="Undergraduate"),
replace=T),
ifelse(Academic=="Master",
sample(c("Software Developer", "AI Scientist",
"Marketing specialist","Business analyst",
"Psychotherapist", "Engineering",
"Pharmacist", "Data Analayst",
"Actuary", "Biostatistician"),
length(Academic=="Master"), replace=T),
sample(c("Market Research Analyst", "Product Manager",
"Management Consulting", "Quantitative Analyst",
"Entrepreneurship", "Finance",
"Mathematicians", "Immunology",
"Psychology", "Pharmacology"),
length(Academic=="Phd"), replace=T)))))
Grade <-sample(c("Expert", "Master", "Intermediate", "Skilled",
"Newbie"), 50000, replace=T)
Income <-sample(c(4000:10000),50000, replace = T)
Spending <-sample(c(3000:7000), 50000, replace = T )
Number_of_children <- ifelse(marital_status=="Yes", sample((c(0:10)),
length(marital_status=="Yes"), replace=T),
"0")
Private_vehicle <- sample(c("Car", "Motorcycle", "Public"),
50000, replace=T)
Home <- sample(c("Rent", "Own", "Credit"),
50000, replace=T)
Data2 <-data.frame (marital_status,
Address,
Work_Location,
Age,
Academic,
Job,
Grade,
Income,
Spending,
Number_of_children,
Private_vehicle,
Home
)
library(DT)
datatable(Data2)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
What kind of important summary statistics you can get from your data set?
## marital_status Address Work_Location Age
## Length:50000 Length:50000 Length:50000 Min. :19.00
## Class :character Class :character Class :character 1st Qu.:29.00
## Mode :character Mode :character Mode :character Median :39.00
## Mean :39.01
## 3rd Qu.:49.00
## Max. :59.00
## Academic Job Grade Income
## Length:50000 Length:50000 Length:50000 Min. : 4000
## Class :character Class :character Class :character 1st Qu.: 5513
## Mode :character Mode :character Mode :character Median : 6996
## Mean : 7001
## 3rd Qu.: 8494
## Max. :10000
## Spending Number_of_children Private_vehicle Home
## Min. :3000 Length:50000 Length:50000 Length:50000
## 1st Qu.:3998 Class :character Class :character Class :character
## Median :5000 Mode :character Mode :character Mode :character
## Mean :4996
## 3rd Qu.:5988
## Max. :7000
The summary we can get is that we can calculate statistical information such as the average value, minimum value, maximum value, first quartile and third quartile from data on age, income, spending, and the number of children.
we can calculate the income, spending and age variables to offer the best insurance that the company can offer so that it gets maximum profit. company can measure the lowest and the highest insurances average offered to customers.
According to your calculations and analysis, which customers are potential for you to maintain?
#average monthly cost for insurance 574$ for individual and family coverage averaged 1634$. we need to maintain customers that 30% of their savings ( Income- Spending ) more than or equal to 574$ or 1634$
Data2$Customer_savings <- Data2$Income - Data2$Spending
Data2$Maintain <- ifelse(Data2$marital_status=="Yes" &
0.30*Data2$Customer_savings>1634,"Yes",
ifelse(Data2$marital_status=="No" &
0.30*Data2$Customer_savings>574, "Yes",
"No"))
print(head(Data2, 5))
## marital_status Address Work_Location Age Academic Job
## 1 No Depok Bekasi 49 Master Marketing specialist
## 2 Yes Tengerang Bekasi 59 Master Engineering
## 3 Yes Bekasi Depok 59 Undergraduate Rideshare Driver
## 4 Yes Bekasi Bogor 19 Master Actuary
## 5 Yes Depok Depok 39 J.School Cashier
## Grade Income Spending Number_of_children Private_vehicle Home
## 1 Expert 4087 6241 0 Car Own
## 2 Skilled 8345 5993 2 Car Rent
## 3 Newbie 4044 3133 9 Car Rent
## 4 Skilled 4727 5461 4 Car Rent
## 5 Newbie 8039 3243 4 Motorcycle Credit
## Customer_savings Maintain
## 1 -2154 No
## 2 2352 No
## 3 911 No
## 4 -734 No
## 5 4796 No
The following types of insurance are available in Indonesia:
Which insurance product will you provide to your customers?
# Calculation & Analysis (to make your answer below are reasonable)
Insurances <- ifelse(Data2$Age>50 &
Data2$Income>6000,
"Life",
ifelse(Data2$Age<50 &
Data2$Income>4000,
"Health",
ifelse(Data2$Private_vehicle!="Public" ,
"Vehicle",
ifelse(Data2$Home=="Own" &
Data2$Private_vehicle!="Public",
"Home and property",
ifelse(Data2$Age>25 &
Data2$marital_status=="Yes" &
Data2$Number_of_children>1 &
Data2$Income<6000 &
Data2$Spending>3000,
"Education",
ifelse(Data2$Home=="Credit" &
Data2$Income<10000 &
Data2$Spending<7000,
"Credit",
ifelse(Data2$Income<10000 &
Data2$Spending<7000,
"Business",
ifelse(Data2$Income<10000 &
Data2$Job=="Fisherman",
"Marine",
"General"))))))))
# Add new variable to your data set (assign product for each customers)
Data2$Insurances<-Insurances
print(head(Data2, 10))
## marital_status Address Work_Location Age Academic
## 1 No Depok Bekasi 49 Master
## 2 Yes Tengerang Bekasi 59 Master
## 3 Yes Bekasi Depok 59 Undergraduate
## 4 Yes Bekasi Bogor 19 Master
## 5 Yes Depok Depok 39 J.School
## 6 Yes Depok Bekasi 39 J.School
## 7 Yes Depok Jakarta 42 H.School
## 8 Yes Jakarta Bekasi 36 Undergraduate
## 9 Yes Depok Jakarta 50 Master
## 10 No Bekasi Depok 30 Phd
## Job Grade Income Spending Number_of_children
## 1 Marketing specialist Expert 4087 6241 0
## 2 Engineering Skilled 8345 5993 2
## 3 Rideshare Driver Newbie 4044 3133 9
## 4 Actuary Skilled 4727 5461 4
## 5 Cashier Newbie 8039 3243 4
## 6 House Cleaner Newbie 6311 6542 7
## 7 House Cleaner Newbie 6007 4135 6
## 8 Nanny Master 8113 5683 2
## 9 Marketing specialist Newbie 8686 4861 10
## 10 Immunology Skilled 4943 4478 0
## Private_vehicle Home Customer_savings Maintain Insurances
## 1 Car Own -2154 No Health
## 2 Car Rent 2352 No Life
## 3 Car Rent 911 No Vehicle
## 4 Car Rent -734 No Health
## 5 Motorcycle Credit 4796 No Health
## 6 Public Own -231 No Health
## 7 Public Credit 1872 No Health
## 8 Public Own 2430 No Health
## 9 Car Rent 3825 No Vehicle
## 10 Motorcycle Own 465 No Health