Assume you have collected some data set from ABC Property as we can see in the following table:
Id <- (1:10000)
Marketing_Name <- rep(c("Angel","Sherly","Vanessa","Irene","Julian",
"Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Juen","Jerrel",
"Imelda","Widi","Theodora","Elvani","Jonathan",
"Sofia","Abraham","Siti","Niko","Sefli",
"Bene", "Diana", "Pupe", "Andi", "Tatha",
"Endri", "Monika", "Hans", "Debora","Hanifa",
"James", "Jihan", "Friska","Ardiwan", "Bakti",
"Anthon","Amry", "Wiwik", "Bastian", "Budi",
"Leo","Simon","Matius","Arry", "Eliando"), 200)
Work_Exp <- rep(c(1.3,2.4,2.5,3.6,3.7,4.7,5.7,6.7,7.7,7.3,
5.3,5.3,10,9.3,3.3,3.3,3.4,3.4,3.5,5.6,
3.5,4.6,4.6,5.7,6.2,4.4,6.4,6.4,3.5,7.5,
4.6,3.7,4.7,4.3,5.2,6.3,7.4,2.4,3.4,8.2,
6.4,7.2,1.5,7.5,10,4.5,6.5,7.2,7.1,7.6),200)
City <- sample(c("Jakarta","Bogor","Depok","Tengerang","Bekasi"),10000, replace = T)
Cluster <- sample(c("Victoria","Palmyra","Winona","Tiara", "Narada",
"Peronia","Lavesh","Alindra","Sweethome", "Asera",
"Teradamai","Albasia", "Adara","Neon","Arana",
"Asoka", "Primadona", "Mutiara","Permata","Alamanda" ), 10000, replace=T)
Price <- sample(c(7000:15000),10000, replace = T)
Date_Sales <- sample(seq(as.Date("2018/01/01"), by = "day", length.out = 1000),10000, replace = T)
Advertisement <- sample(c(1:20), 10000, replace = T)
Data <- data.frame(Id,
Marketing_Name,
Work_Exp,
City,
Cluster,
Price,
Date_Sales,
Advertisement)
library(DT)
datatable(Data)Categorize the Price into three groups:
Assign it into a new variable called Class by using If and else if statement.
Data$Class <- ifelse(Data$Price > 12000,
"High",
ifelse(Data$Price < 10000,
"Low",
"Medium"))
library(DT)
datatable(Data)Categorize the Price into six groups:
Assign it into a new variable called Booking_fee by using if and else if statements.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
According to the final date set that you have created at task 2, I assume you have been working as marketing at ABC Property company, how could you collected all information about your sales by using for statement.
If you will get pay 2% bonus of the Booking fee per-unit as marketing and also get an extra 1% bonus if you have been working at this company for more than 3 years. Please calculate the total bonus by using if, for, and break statements.
library(dplyr)
for(ABC_Property in Data$Marketing_Name){
Data$Bonus <- ifelse(Data$Work_Exp > 3, Data$BookingFee*0.03, Data$BookingFee*0.02)
break }
BonusPersonal <- data.frame(aggregate(Data$Bonus, by = list(Marketing_Name = Data$Marketing_Name,
Work_Exp = Data$Work_Exp), FUN = sum) %>%
dplyr::rename("Bonus" = x))
MyBonus <- BonusPersonal %>%
filter(Marketing_Name == "Siana")
library(DT)
datatable(MyBonus)In this section your are expected to be able to use all statements that you just learn earlier. So, please answer the following questions:
Marketing <- data.frame(aggregate(Data$Price, by = list(Marketing_Name = Data$Marketing_Name),
FUN = sum))
Best_Marketing <- Marketing %>%
filter(x == max(Marketing$x)) %>%
dplyr::rename("Total Price" = x) %>%
print()## Marketing_Name Total Price
## 1 Angel 2286258
City_Cluster <- data.frame(aggregate(Data$Price, by= list(City = Data$City,
Cluster = Data$Cluster),
FUN = sum ))
Best_CC <- City_Cluster %>%
filter(x == max(City_Cluster$x)) %>%
dplyr::rename("Total Price" = x) %>%
print()## City Cluster Total Price
## 1 Depok Alindra 1385192
Cost <- Data$Advertisement*4
Name <- data.frame(Marketing_Name,
Cost)
Ads.Pay <- data.frame(aggregate(Name$Cost, by = list(Name = Name$Marketing_Name),
FUN = sum) %>%
dplyr::rename("My Pay" = x))
MyPay <- Ads.Pay[Ads.Pay$Name == "Siana",]
library(DT)
datatable(MyPay)Average_Ads.Pay <- data.frame(aggregate(Name$Cost, by = list(Name = Data$Marketing_Name),
FUN = mean)) %>%
dplyr::rename("Average Ads Cost" = x)
library(DT)
datatable(Average_Ads.Pay)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ stringr 1.4.0
## ✓ tidyr 1.1.2 ✓ forcats 0.5.0
## ✓ readr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Date <- Data %>%
separate(Date_Sales, c("year","month","day"), sep = "-")
Date$Revenue <- Data$Price + Data$BookingFee - Data$Bonus - Name$Cost
Monthly <- aggregate.data.frame(Date$Revenue, by = list(Name = Data$Marketing_Name,
Month = Date$month,
Year = Date$year),
FUN = sum) %>%
dplyr::rename("Revenue Monthly" = x)
library(DT)
datatable(Monthly)Suppose you have a market research project to maintain some potential customers at your company. Let’s assume you are working at ABC insurance company. To do so, you want to collect the following data set:
Please provide me a data set about the information of 50000 customers refers to each variable above!
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
Marital_Status <- sample(c("Yes", "No"),
50000, replace = T)
Address <- sample(c("Jakarta",
"Bogor",
"Depok",
"Tangerang",
"Bekasi"),
50000, replace = T)
Work_Loc <- sample(c("Jakarta",
"Bogor",
"Depok",
"Tangerang",
"Bekasi"),
50000, replace = T)
Age <- sample(c(19:60),
50000, replace = T)
Academic <- sample(c("Junior School",
"High School",
"Undergraduate",
"Master",
"Ph.D"),
50000, replace = T)
Jobs <- ifelse(Academic == "Junior School",
sample(c("Tukang Parkir", "OB",
"Tukang Cuci Piring", "Pelayan",
"Satpam", "Pembantu",
"Montir", "Tukang Judi",
"Bandar Narkoba", "Tukang Potong"),
50000, replace = T),
ifelse(Academic == "High School",
sample(c("Barber", "Kasir",
"Tukang Masak", "Kurir",
"Pelayan", "Montir",
"Admin", "Staff",
"Selebgram", "Model"),
50000, replace = T),
ifelse(Academic == "Undergraduate",
sample(c("Accounting", "Staff",
"HRD", "Content Writer",
"Teller", "Customer Service",
"Admin Sosmed", "Youtuber",
"Entrepreneur", "Translator"),
50000, replace = T),
ifelse(Academic == "Master",
sample(c("Dosen", "Designer",
"Guru", "Presenter",
"Komedian", "Komikus",
"CEO", "Direktur Keuangan",
"Tax Consultant", "Pilot"),
50000, replace = T),
sample(c("Rektor", "CEO",
"Public Speaker", "Psikiater",
"Dokter", "Surgeon",
"Programmer", "Auditor",
"Entrepreneur", "Ilmuan"),
50000, replace = T)))))
Grades <- sample(c("Low",
"Beginner",
"Middle",
"High",
"Pro"), 50000, replace = T)
Incomes <- ifelse(Academic == "Junior School",
round_any(runif(length(Academic == "Junior School"),
1000000,
1500000),
100000),
ifelse(Academic == "High School",
round_any(runif(length(Academic == "High School"),
2000000,
3000000),
100000),
ifelse(Academic == "Undergraduate",
round_any(runif(length(Academic == "Undergraduate"),
5000000,
7000000),
100000),
ifelse(Academic == "Master",
round_any(runif(length(Academic == "Master"),
9000000,
12000000),
100000),
round_any(runif(length(Academic == "Ph.D"),
15000000,
20000000),
100000)))))
Spending <- Incomes*round_any((runif(50000, 0.5, 0.6)), 0.01)
Number_Of_Children <- ifelse(Marital_Status == "Yes",
sample(c(0:10)), 1)
Private_Vehicles <- sample(c("Car",
"Motorcycle",
"Public"),
50000, replace = T)
Home <- sample(c("Rent",
"Own",
"Credit"),
50000, replace = T)
ABCIns_Company <- data.frame(Marital_Status,
Address,
Work_Loc,
Age,
Academic,
Jobs,
Grades,
Incomes,
Spending,
Number_Of_Children,
Private_Vehicles,
Home)
library(DT)
datatable(ABCIns_Company)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
What kind of important summary statistics you can get from your data set? Answer : Important summary statistics that can we get from data set are : - Age, to have insurance and retain customers in the ABC Insurance Company is have a minimum age of 19 years and maximum age of 60 years. - Income, as a comparison of whether customers can apply for insurance to companies with an income of at least 1 million. - Spending, as a comparison where expenditures cannot be greater than income, that is, 1/2 of income. Income and spending are used to see if a customer can make a stable payment. - Number of children, knowing the number of children owned can be used as a form of comparison in making insurance, for example education insurance.
## Marital_Status Address Work_Loc Age
## Length:50000 Length:50000 Length:50000 Min. :19.00
## Class :character Class :character Class :character 1st Qu.:29.00
## Mode :character Mode :character Mode :character Median :40.00
## Mean :39.57
## 3rd Qu.:50.00
## Max. :60.00
## Academic Jobs Grades Incomes
## Length:50000 Length:50000 Length:50000 Min. : 1000000
## Class :character Class :character Class :character 1st Qu.: 2300000
## Mode :character Mode :character Mode :character Median : 6000000
## Mean : 7539210
## 3rd Qu.:11200000
## Max. :20000000
## Spending Number_Of_Children Private_Vehicles Home
## Min. : 500000 Min. : 0.000 Length:50000 Length:50000
## 1st Qu.: 1239000 1st Qu.: 1.000 Class :character Class :character
## Median : 3294000 Median : 1.000 Mode :character Mode :character
## Mean : 4146088 Mean : 3.001
## 3rd Qu.: 6160000 3rd Qu.: 5.000
## Max. :12000000 Max. :10.000
According to your calculations and analysis, which customers are potential for you to maintain? - Potential customers that can be maintain is where 3% of income smaller than customers income - spending.
ABCIns_Company$Potential <- ifelse(0.3*ABCIns_Company$Incomes < ABCIns_Company$Incomes - ABCIns_Company$Spending,
"Yes",
"No")
Potential <- ABCIns_Company[ABCIns_Company$Potential == "Yes",]
library(DT)
datatable(Potential)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
The following types of insurance are available in Indonesia:
Which insurance product will you provide to your customers?
# Calculation & Analysis (to make your answer below are reasonable)
Customers_Insurances <-
ifelse(ABCIns_Company$Age > 50 &
ABCIns_Company$Incomes > 1000000,
"Life Insurance",
ifelse(ABCIns_Company$Age < 50 &
ABCIns_Company$Incomes > 2000000,
"Health Insurance",
ifelse(ABCIns_Company$Private_Vehicles == "Car",
"Vehicle Insurance",
ifelse(ABCIns_Company$Home == "Own" &
ABCIns_Company$Private_Vehicles == "Motorcycle",
"Home and property Insurance",
ifelse(ABCIns_Company$Age > 30 &
ABCIns_Company$Marital_Status == "Yes" &
ABCIns_Company$Number_Of_Children > 1 &
ABCIns_Company$Incomes < 2000000,
"General Insurance",
ifelse(ABCIns_Company$Home == "Credit" &
ABCIns_Company$Incomes < 3000000 &
ABCIns_Company$Spending < 700000,
"Credit Insurance",
ifelse(ABCIns_Company$Jobs == "Staff" &
ABCIns_Company$Incomes < 1000000,
"Business Insurance",
ifelse(ABCIns_Company$Age < 25 &
ABCIns_Company$Incomes > 1000000,
"Education Insurance",
"Marine Insurance"))))))))# Add new variable to your data set (assign product for each customers)
ABCIns_Company$Customers_Insurances <- Customers_Insurances
library(DT)
datatable(ABCIns_Company)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
Laptop saya sangat amat ngelag pak, sampe saya sangat amat sabar pak nungguinnya, ngefreeze bisa 20 menit, udah gitu ga cuma sekali pak, mohon dimaklumi ya pak :(, maaf saya curhat pak >-<