df <- read.csv('ifood_df.csv')
str(df)
## 'data.frame': 2240 obs. of 39 variables:
## $ Income : int 58138 46344 71613 26646 58293 62513 55635 33454 30351 5648 ...
## $ Kidhome : int 0 1 0 1 1 0 0 1 1 1 ...
## $ Teenhome : int 0 1 0 0 0 1 1 0 0 1 ...
## $ Recency : int 58 38 26 26 94 16 34 32 19 68 ...
## $ MntWines : int 635 11 426 11 173 520 235 76 14 28 ...
## $ MntFruits : int 88 1 49 4 43 42 65 10 0 0 ...
## $ MntMeatProducts : int 546 6 127 20 118 98 164 56 24 6 ...
## $ MntFishProducts : int 172 2 111 10 46 0 50 3 3 1 ...
## $ MntSweetProducts : int 88 1 21 3 27 42 49 1 3 1 ...
## $ MntGoldProds : int 88 6 42 5 15 14 27 23 2 13 ...
## $ NumDealsPurchases : int 3 2 1 2 5 2 4 2 1 1 ...
## $ NumWebPurchases : int 8 1 8 2 5 6 7 4 3 1 ...
## $ NumCatalogPurchases : int 10 1 2 0 3 4 3 0 0 0 ...
## $ NumStorePurchases : int 4 2 10 4 6 10 7 4 2 0 ...
## $ NumWebVisitsMonth : int 7 5 4 6 5 6 6 8 9 20 ...
## $ AcceptedCmp3 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ AcceptedCmp4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AcceptedCmp5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AcceptedCmp1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AcceptedCmp2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Complain : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Z_CostContact : int 3 3 3 3 3 3 3 3 3 3 ...
## $ Z_Revenue : int 11 11 11 11 11 11 11 11 11 11 ...
## $ Response : int 1 0 0 0 0 0 0 0 1 0 ...
## $ Age : int 63 66 55 36 39 53 49 35 46 70 ...
## $ Customer_Days : int 2822 2272 2471 2298 2320 2452 2752 2576 2547 2267 ...
## $ marital_Divorced : int 0 0 0 0 0 0 1 0 0 0 ...
## $ marital_Married : int 0 0 0 0 1 0 0 1 0 0 ...
## $ marital_Single : int 1 1 0 0 0 0 0 0 0 0 ...
## $ marital_Together : int 0 0 1 1 0 1 0 0 1 1 ...
## $ marital_Widow : int 0 0 0 0 0 0 0 0 0 0 ...
## $ education_2n.Cycle : int 0 0 0 0 0 0 0 0 0 0 ...
## $ education_Basic : int 0 0 0 0 0 0 0 0 0 0 ...
## $ education_Graduation: int 1 1 1 1 0 0 1 0 0 0 ...
## $ education_Master : int 0 0 0 0 0 1 0 0 0 0 ...
## $ education_PhD : int 0 0 0 0 1 0 0 1 1 1 ...
## $ Spent : int 1617 27 776 53 422 716 590 169 46 49 ...
## $ AcceptedCmpOverall : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Dt_Customer : chr "9/4/12" "3/8/14" "8/21/13" "2/10/14" ...
With the output provided, I need to find a way to convert some of
these variables into an appropriate use.
accept_cols <- c(16:21)
me_cols <- c(27:36) #marital and educational column
df[,accept_cols]<- lapply(df[,accept_cols],factor)
df[,me_cols ]<- lapply(df[,me_cols],factor) #applying a subset of data into a factor var
############### next create new variables and assign their value
df <- df %>%
mutate(Spent = (MntFishProducts + MntMeatProducts + MntFruits
+ MntSweetProducts + MntWines + MntGoldProds)/2) %>%
#Spent represents data for 2 years only. Divide by 2 to get the average per year
mutate(Total_purchases = NumCatalogPurchases + NumStorePurchases + NumWebPurchases) %>%
mutate(Has_minor = (Kidhome + Teenhome)) %>%
mutate(Has_minor=ifelse(Has_minor == 0,'No','Yes')) %>%
mutate(Has_minor= as.factor(Has_minor)) %>%
select(-c(Z_CostContact,Z_Revenue,Kidhome,Teenhome,Response))
df$Dt_Customer <- mdy(df$Dt_Customer) # convert to date var
Now I want to go further. I will create more dummy and categorical
variables.
#want a make a singular column representing status
#goal create a two level var for children, marital, education and campaign if yes 1, else 0
df <- df %>%
mutate(Education= ifelse(education_Master == 1,'Master',
ifelse(education_Graduation == 1,
"Undergrad",
ifelse(education_Basic == 1,
"Diploma",
ifelse(education_PhD == 1,
"PhD",
ifelse(education_2n.Cycle == 1,
"Community",
"ERROR")))))) %>%
mutate(Marital= ifelse(marital_Divorced == 1,'Divorced',
ifelse(marital_Married == 1,
"Married",
ifelse(marital_Together == 1,
"Domestic",
ifelse(marital_Widow == 1,
"Widow","Single"))))) %>%
mutate(AcceptedCmp= ifelse(AcceptedCmp3 == 1,'Cmp3',
ifelse(AcceptedCmp4 == 1,
"Cmp4",
ifelse(AcceptedCmp5 == 1,
"Cmp5",
ifelse(AcceptedCmp1 == 1,
"Cmp1",
ifelse(AcceptedCmp2 == 1,
"Cmp2",
"Cmp_Denied"))))))
# for regression
# column "Response" is not accurate, need to remove
df<- df %>%
mutate(Relationship= ifelse(Marital=='Single',0,1)) %>%
mutate(Campaign= ifelse(AcceptedCmp=="Cmp_Denied",0,1)) %>%
mutate(College= ifelse(Education=='Diploma',0,1)) %>%
mutate (AcceptedCmp0= ifelse(AcceptedCmp=="Cmp_Denied",1,0)) %>%
mutate(Relationship= as.factor(Relationship)) %>%
mutate(Campaign= as.factor(Campaign)) %>%
mutate(AcceptedCmp0= as.factor(AcceptedCmp0)) %>%
mutate(College= as.factor(College))
str(df)
## 'data.frame': 2240 obs. of 43 variables:
## $ Income : int 58138 46344 71613 26646 58293 62513 55635 33454 30351 5648 ...
## $ Recency : int 58 38 26 26 94 16 34 32 19 68 ...
## $ MntWines : int 635 11 426 11 173 520 235 76 14 28 ...
## $ MntFruits : int 88 1 49 4 43 42 65 10 0 0 ...
## $ MntMeatProducts : int 546 6 127 20 118 98 164 56 24 6 ...
## $ MntFishProducts : int 172 2 111 10 46 0 50 3 3 1 ...
## $ MntSweetProducts : int 88 1 21 3 27 42 49 1 3 1 ...
## $ MntGoldProds : int 88 6 42 5 15 14 27 23 2 13 ...
## $ NumDealsPurchases : int 3 2 1 2 5 2 4 2 1 1 ...
## $ NumWebPurchases : int 8 1 8 2 5 6 7 4 3 1 ...
## $ NumCatalogPurchases : int 10 1 2 0 3 4 3 0 0 0 ...
## $ NumStorePurchases : int 4 2 10 4 6 10 7 4 2 0 ...
## $ NumWebVisitsMonth : int 7 5 4 6 5 6 6 8 9 20 ...
## $ AcceptedCmp3 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ AcceptedCmp4 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ AcceptedCmp5 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ AcceptedCmp1 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ AcceptedCmp2 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ Complain : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ Age : int 63 66 55 36 39 53 49 35 46 70 ...
## $ Customer_Days : int 2822 2272 2471 2298 2320 2452 2752 2576 2547 2267 ...
## $ marital_Divorced : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ marital_Married : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1 1 ...
## $ marital_Single : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
## $ marital_Together : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 1 2 2 ...
## $ marital_Widow : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ education_2n.Cycle : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ education_Basic : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ education_Graduation: Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 1 1 1 ...
## $ education_Master : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ education_PhD : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 2 2 ...
## $ Spent : num 808.5 13.5 388 26.5 211 ...
## $ AcceptedCmpOverall : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Dt_Customer : Date, format: "2012-09-04" "2014-03-08" ...
## $ Total_purchases : int 22 4 20 6 14 20 17 8 5 1 ...
## $ Has_minor : Factor w/ 2 levels "No","Yes": 1 2 1 2 2 2 2 2 2 2 ...
## $ Education : chr "Undergrad" "Undergrad" "Undergrad" "Undergrad" ...
## $ Marital : chr "Single" "Single" "Domestic" "Domestic" ...
## $ AcceptedCmp : chr "Cmp_Denied" "Cmp_Denied" "Cmp_Denied" "Cmp_Denied" ...
## $ Relationship : Factor w/ 2 levels "0","1": 1 1 2 2 2 2 2 2 2 2 ...
## $ Campaign : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ College : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ AcceptedCmp0 : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
The new structure shows everything worked out. Now I can check for
missing data.
sum(is.na(df)) #check for missing data
## [1] 1505
df <- df %>% drop_na() #empty rows at the bottom of the df
1505 NA values, I viewed the data set and luckily most of the NA
values are empty rows, they will be removed.
Exploratory
Analysis
Customer Age
df.age <- ggplot(df, aes(Age))
(df.age + geom_histogram(aes(y=..density..), colour="black", fill="#CCCCFF")+
geom_density(alpha=.2, fill="#FF6666") +
ylab("Distribution") +
ggtitle("Customer Age") +
theme_classic())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The average age of our consumers within the given data set falls
between the ages of 40-50 years. We also have a wide range of customers
over the age of 60.
df.age <- ggplot(df, aes(Age))
(df.age+ geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666")+
ylab("Distribution") +
ggtitle("Customer Age per Education") +
theme_classic() + facet_grid(Education ~ .))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

It is observed that customers under the age of 40 typically carry a
diploma or have some college experience, but our most active customers
(40-60 years) have a BA/BS or above.
Education and
Income
df.age <- ggplot(df, aes(x=Education, y=Income, fill = Education))
(df.age+ geom_boxplot(notch = TRUE,
outlier.colour="red", outlier.shape=8, outlier.size = 6)
+ ggtitle("Customer Income") +
theme_bw() + coord_flip())

Customers with some college experience appear to have more disposable
income.
Product Types
# exploring products
products <- df %>% select(MntWines,MntFruits,MntMeatProducts,
MntSweetProducts,MntGoldProds,Spent,
Has_minor,Education,Age)%>%
gather(1:5,
key = Product_type,
value = Total_amount) #turn column into values
products_test <- products %>%
group_by(Product_type) %>% summarise(SumProduct=sum(Total_amount))
PieChart(Product_type,SumProduct, data = products_test, hole = 0,
values = "input")

Customers seem to favorite the wine products above all else.
Traffic Source

Most of our purchases are in-store but deal purchases have the
weakest conversion rate at 16 percent.
# bar
product.bp <- ggplot(products, aes(x=Product_type, y=Total_amount))
(product.bp + geom_bar(stat="identity", width=0.5, fill='#cc3333') +
theme_linedraw()+ facet_grid(Education ~ Has_minor) +
ylab("Total Amount Spent") + xlab("Product Type") +
ggtitle("Customer Purchases"))

We see that customers with a diploma rarely purchase any grocery
products. We can remove this set from the visuals.
# box: remove diploma
products_t <- products %>% filter(Education != 'Diploma')
products_t <- ggplot(products_t, aes(x=Product_type, y=Total_amount))
(products_t + geom_bar(stat="identity", width=0.5, fill='steelblue') +
theme_linedraw()+ facet_grid(Education ~ Has_minor) +
ylab("Total Amount Spent") + xlab("Product Type") +
ggtitle("Customer Purchases"))

Now with a compact set, we can see activity level given the education
and if the customer has a minor(yes/no).
Wine and meat products have the highest level of consumption for all
demographics.
Customer
Spending
#scatter
## getting average expenditure per age group, combine
age.s <- products %>% group_by(Age) %>%
summarize(avg=mean(Spent))
product.s <- ggplot(age.s, aes(x=Age, y=avg))
(product.s + geom_point() +
ylab("Average Expenditure") + xlab("Age")
+ theme_bw() +
ggtitle("Customer Spending per Age") +
geom_smooth(method=lm,linetype="dashed",
color="darkred", fill="blue"))
## `geom_smooth()` using formula = 'y ~ x'

We see that the average spent on groceries is higher for those under
40 and over 60. Averaging spending limit remains consistent for those
aged 40-60.
Campaign Data
campaign <- df %>% select(Total_purchases,Spent,Education, Marital,AcceptedCmp,Has_minor,Age,Dt_Customer) %>% filter(AcceptedCmp != 'Cmp_Denied')
cmp1 <- ggplot(campaign, aes(x=AcceptedCmp,fill = Education))
cmp2 <- ggplot(campaign, aes(x=AcceptedCmp, y=Total_purchases))
p1 <- (cmp1 + geom_bar(position = 'stack', width=0.5)+
theme_linedraw()+
ylab("Count") + xlab("Campaign")+
scale_fill_manual("Education", values=c("#99cc00","#003300","#00c000", "#ccff00","#66cc66","#33ff33")))
p2<- (cmp2 + geom_bar(stat="identity", width=0.5, color = '#ccff33')+
theme_linedraw() + facet_grid(Education ~ .)+
ylab("Revenue") + xlab("Campaign"))
cowplot::plot_grid(p1, p2, labels = c('Count', 'Sales'), label_size = 7)

Undergrads have been highly responsive to the campaigns issued.
Campaigns 3 and 4 have had the most success in this data frame and
should be focused on for profitable gains.
It seems like Campaign 2 failed to reach its targeted audience, and
have little to no interaction in all subgroups.


Customer Last
Visit
avg.r.education <- df %>% group_by(Education) %>%
summarize(avg=mean(Recency))
avg.r.marital <- df %>% group_by(Marital) %>%
summarize(avg=mean(Recency))
avg.r.has_minor <- df %>% group_by(Has_minor) %>%
summarize(avg=mean(Recency))
avg.r <-bind_rows(avg.r.education,avg.r.marital,avg.r.has_minor)
print(avg.r)
## # A tibble: 12 × 4
## Education avg Marital Has_minor
## <chr> <dbl> <chr> <fct>
## 1 Community 48.4 <NA> <NA>
## 2 Diploma 48.4 <NA> <NA>
## 3 Master 47.3 <NA> <NA>
## 4 PhD 48.5 <NA> <NA>
## 5 Undergrad 49.9 <NA> <NA>
## 6 <NA> 49.1 Divorced <NA>
## 7 <NA> 50.2 Domestic <NA>
## 8 <NA> 48.2 Married <NA>
## 9 <NA> 49.0 Single <NA>
## 10 <NA> 48.8 Widow <NA>
## 11 <NA> 48.9 <NA> No
## 12 <NA> 49.0 <NA> Yes
Looks like customers spend about 47-50 days away on average since
their last visit.
How Does Age
Factor?
age.r <- df %>% group_by(Age) %>%
summarize(avg=mean(Recency))
avg_age_recency <- ggplot(age.r, aes(x=Age, y=avg))
(avg_age_recency + geom_point() +
ylab("Avg. Days") + xlab("Age")
+ theme_bw() +
ggtitle("Day's Since Last Visit (Avg.)") +
geom_smooth(method=lm,linetype="dashed",
color="darkred", fill="blue"))
## `geom_smooth()` using formula = 'y ~ x'

Customer
Enrollment
visit1 <- ggplot(campaign, aes(x=Dt_Customer, fill=Has_minor))
(visit1 + geom_density(alpha=0.4)+
geom_vline(aes(xintercept=mean(Dt_Customer)), color="blue",
linetype="dashed")+
labs(title="Customer Enrollment",x="Enrollment Date", y = "Density")+
theme_classic()+ theme(legend.position="top") +
scale_fill_manual("Has Minor",values=c("orange","lightblue")))

Customers enrolled at higher rates during the summer time.
Promotional offers should be focused on during this season.
visit_avg <- campaign %>% group_by(Has_minor,Education,Dt_Customer) %>%
summarize(avg=mean(Total_purchases))
## `summarise()` has grouped output by 'Has_minor', 'Education'. You can override
## using the `.groups` argument.
visit2 <- ggplot(visit_avg, aes(Dt_Customer, avg, color=Has_minor))
(visit2 + geom_point() + labs(title="Customer Enrollment vs. Avg Purchases",
x="Enrollment Date", y = "Avg Purchase")+
scale_color_manual("Has Minor",values=c("orange","lightblue")))

Those without children on average tend to purchase more on the
enrollment date vs. those that have minors.
(visit2 + geom_line() +
scale_color_manual("Has Minor",labels = c("No", "Yes"),values=c("orange","lightblue"))
+ xlab("Enrollment Date") +
ylab('Avg Purchase')
+facet_grid( Education~Has_minor))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Testing
Correlation Test
cor_test_Age_Spent <- cor.test(df$Age,df$Spent) #Between age and expenses
cor_test_Income_Spent <- cor.test(df$Income,df$Spent) #Between income and expenses
print(cor_test_Age_Spent)
##
## Pearson's product-moment correlation
##
## data: df$Age and df$Spent
## t = 5.5968, df = 2203, p-value = 0.00000002455
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.07704195 0.15936015
## sample estimates:
## cor
## 0.1184045
print(cor_test_Income_Spent)
##
## Pearson's product-moment correlation
##
## data: df$Income and df$Spent
## t = 68.201, df = 2203, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8098755 0.8367401
## sample estimates:
## cor
## 0.8237697
As expected Income and Spending will be very similar and a good fit
for our models.
Primary Model
\[Spent_i = \beta_1 + \beta_2Age +
\beta_3Income_i + \beta_4Relationship_i + \beta_5College +
\beta_6Campaign + \beta_7Minor +\beta_8Complain + u_i\] \[H_O: \beta_n = 0\] Null Hypothesis:
Coefficients have zero impact to spending \[H_A: \beta_n \neq 0\] Alternate
Hypothesis: Coefficients have little or some impact, so we can reject
the null
Variables for the model:
\(Spent\): This will examine if
customer spending would increase or decrease given our
estimators.
\(Age\): Age of our
customers.
\(Income\): Customers given
income per year.
\(Relationship\): Dummy var
determining if a customer in a relationship impacts spending per
year.
\(College\): Dummy var
determining if a customer with higher education vs basic impacts
spending habits.
\(Campaign\): Dummy var
determining if campaigns work on increasing customer spending per
year.
\(Minor\): Dummy var determining
if having a minor impacts spending per year.
\(Complain\): Dummy var
determining if the customer complaint impacts spending per
year.
\(u\): Is our error term (Other
factor that affect Spending other than the estimators)
#regression
##primary predicted model
md1 <- lm(Spent ~ Age + Income + Relationship+ Has_minor + College + Campaign + Complain, df)
#other predicted model/ test adjusted R^2
md2 <- lm(Spent ~ Age + Income +
marital_Married + marital_Divorced + marital_Widow + marital_Together
+ Campaign + College + Has_minor + Complain, df)
md3 <- lm (Spent ~ Age + Income + Relationship + Has_minor + College +
AcceptedCmp1 + AcceptedCmp5 + AcceptedCmp3 + AcceptedCmp4 + AcceptedCmp2 + Complain, df)
md4 <- lm(Spent ~ Age + Income +
education_PhD + education_Master + education_Graduation + education_2n.Cycle
+ Campaign +Has_minor + Complain, df)
tab_model(md1,md2,md3,md4,
dv.labels = c("First Model", "Reltionship Model","Campaign Model","Education Model"))
|
|
First Model
|
Reltionship Model
|
Campaign Model
|
Education Model
|
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-51.17
|
-100.76 – -1.57
|
0.043
|
-48.76
|
-98.60 – 1.08
|
0.055
|
-54.12
|
-103.94 – -4.30
|
0.033
|
-48.53
|
-97.91 – 0.85
|
0.054
|
|
Age
|
-0.94
|
-1.51 – -0.37
|
0.001
|
-0.99
|
-1.57 – -0.41
|
0.001
|
-0.74
|
-1.32 – -0.17
|
0.012
|
-0.94
|
-1.51 – -0.36
|
0.001
|
|
Income
|
0.01
|
0.01 – 0.01
|
<0.001
|
0.01
|
0.01 – 0.01
|
<0.001
|
0.01
|
0.01 – 0.01
|
<0.001
|
0.01
|
0.01 – 0.01
|
<0.001
|
|
Relationship [1]
|
3.62
|
-12.25 – 19.49
|
0.655
|
|
|
|
2.07
|
-13.82 – 17.95
|
0.799
|
|
|
|
|
Has minor [Yes]
|
-133.34
|
-149.34 – -117.34
|
<0.001
|
-133.25
|
-149.27 – -117.24
|
<0.001
|
-128.82
|
-145.21 – -112.43
|
<0.001
|
-133.54
|
-149.52 – -117.56
|
<0.001
|
|
College [1]
|
-60.95
|
-104.41 – -17.49
|
0.006
|
-60.97
|
-104.46 – -17.47
|
0.006
|
-59.23
|
-102.70 – -15.77
|
0.008
|
|
|
|
|
Campaign [1]
|
94.43
|
77.40 – 111.47
|
<0.001
|
94.56
|
77.52 – 111.60
|
<0.001
|
|
|
|
94.52
|
77.49 – 111.55
|
<0.001
|
|
Complain [1]
|
-22.13
|
-90.29 – 46.02
|
0.524
|
-21.42
|
-89.62 – 46.77
|
0.538
|
-26.50
|
-94.67 – 41.66
|
0.446
|
-22.93
|
-91.13 – 45.27
|
0.510
|
|
marital Married [1]
|
|
|
|
-1.26
|
-18.69 – 16.18
|
0.888
|
|
|
|
|
|
|
|
marital Divorced [1]
|
|
|
|
7.79
|
-16.81 – 32.39
|
0.535
|
|
|
|
|
|
|
|
marital Widow [1]
|
|
|
|
13.34
|
-24.84 – 51.51
|
0.493
|
|
|
|
|
|
|
|
marital Together [1]
|
|
|
|
8.52
|
-10.49 – 27.53
|
0.379
|
|
|
|
|
|
|
|
AcceptedCmp1 [1]
|
|
|
|
|
|
|
49.19
|
19.13 – 79.25
|
0.001
|
|
|
|
|
AcceptedCmp5 [1]
|
|
|
|
|
|
|
88.41
|
58.10 – 118.72
|
<0.001
|
|
|
|
|
AcceptedCmp3 [1]
|
|
|
|
|
|
|
58.65
|
33.51 – 83.80
|
<0.001
|
|
|
|
|
AcceptedCmp4 [1]
|
|
|
|
|
|
|
50.32
|
22.94 – 77.70
|
<0.001
|
|
|
|
|
AcceptedCmp2 [1]
|
|
|
|
|
|
|
9.25
|
-50.02 – 68.51
|
0.760
|
|
|
|
|
education PhD [1]
|
|
|
|
|
|
|
|
|
|
-61.72
|
-107.26 – -16.18
|
0.008
|
|
education Master [1]
|
|
|
|
|
|
|
|
|
|
-63.27
|
-109.19 – -17.35
|
0.007
|
|
education Graduation [1]
|
|
|
|
|
|
|
|
|
|
-55.31
|
-99.17 – -11.45
|
0.013
|
|
education 2n Cycle [1]
|
|
|
|
|
|
|
|
|
|
-76.87
|
-124.43 – -29.30
|
0.002
|
|
Observations
|
2205
|
2205
|
2205
|
2205
|
|
R2 / R2 adjusted
|
0.737 / 0.736
|
0.737 / 0.736
|
0.737 / 0.736
|
0.737 / 0.736
|
Interpretation of
Results
Lets analyze the coefficients:
\(b_1 = -51.17\). Meaning
without estimators (campaign, education, etc) our individual’s spending
would be $51.17 less per year. This estimator is significant at 5%
testing, holding other estimators fixed.
\(b_2Age= -0.94\). This model is
insinuating that for every year of age, we can expect spending to drop
by $0.94. This estimator is significant at 5% testing, holding other
estimators fixed.
\(b_3Income = 0.01\). Meaning we
can predict spending to increase $.01 per income dollar per year. This
estimator is significant at 5% testing, holding other estimators
fixed.
\(b_4Relationship = 3.62\).
Meaning customers in relationships tend to spend $3.62 more per year
versus those that are single. Although this estimator p-value shows that
this result is insignificant. We can neglect this coefficient from our
study.
\(b_5College= -60.96\). This
model is insinuating that for customers with college college experience
spend $60.96 less per year compared to those with a diploma This
estimator is significant at 5% testing, holding other estimators
fixed.
\(b_6Campaign = 94.43\). Meaning
we can predict customers to spend $94.43 more per year with successful
campaigns. This estimator is significant at 5% testing, holding other
estimators fixed.
\(b_7Minor= -133.34\). This
model is insinuating that for customers with minors spend $133.34 less
per year when shopping, possibly due to other priorities such as child
expenses. This estimator is significant at 5% testing, holding other
estimators fixed.
\(b_8Complain = -22.13\).
Meaning we can predict spending to decrease $22.13 per year for every
additional complain experienced that year. Although this estimator
p-value shows that this result is insignificant. We can neglect this
coefficient from our study.
\(R^{2} = 0.737\). Means that
73.7% of the variation is represented in this model.