install.packages(“ggcorrplot”)
library(readxl)
library(ggplot2)
library(plotly)
library(ggcorrplot)
Extracting the dataset from excel
Final_Assignment_datasets_marketing_campaign_SF <- read_excel("C:/Users/Adiza Ojei/Desktop/2021 Hard drive Docs/HULT ONLINE CLASS/Data Analytics/Final Assignment _datasets_marketing_campaign_SF.xlsx")
mktg_data <- Final_Assignment_datasets_marketing_campaign_SF ##saving the data to a dataframe
mktg_data <- as.data.frame(mktg_data)
Q1: What factors seem to drive web purchases?
Plotting Income vs Number of Web Purchases taking into account Education
## for Income, we have removed one extreme outlier of Income value 666k
chart1 <- ggplot(data=mktg_data, aes(x=Income,y=NumWebPurchases, color=Education)) + geom_point() + xlab("Income") + ylab("Number of Web Purchases") + xlim(1,170000)
ggplotly(chart1)
Plotting age vs Number of Web Purchases taking into account Number of kids at home
### Creating a new variable called age as we have the information Year of Birth
mktg_data$age <- c()
mktg_data$age <- 2021 - mktg_data$Year_Birth ## computing the present age
chart2 <- ggplot(data=mktg_data, aes(x=age,y=NumWebPurchases, color=factor(Kidhome))) + geom_point() + xlab("Age") + ylab("Number of Web Purchases")
ggplotly(chart2)
Analysis of WebPurchases vs Education shows that PhDs and Graduates do maximum webpurchases
chart3 <- ggplot(data = mktg_data, aes(x = Education, y = NumWebPurchases, fill = Education)) +
geom_col(position = position_dodge())
chart3
Analysis of WebPurchases vs Marital Status shows that maximum Web Purchases come from Married Graduates and Single PhDs
chart4 <- ggplot(data = mktg_data, aes(x = Marital_Status, y = NumWebPurchases, fill = Education)) +
geom_col(position = position_dodge())
chart4
Q2: Is there a relation between web visits and web purchases?
webvisit_vs_webpurchase <- lm(NumWebPurchases~NumWebVisitsMonth,data=mktg_data)
summary(webvisit_vs_webpurchase) ## extracting the summary
##
## Call:
## lm(formula = NumWebPurchases ~ NumWebVisitsMonth, data = mktg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4248 -2.0411 -0.3609 1.7670 22.6391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.42481 0.14123 31.330 <2e-16 ***
## NumWebVisitsMonth -0.06395 0.02417 -2.646 0.0082 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.775 on 2238 degrees of freedom
## Multiple R-squared: 0.003119, Adjusted R-squared: 0.002673
## F-statistic: 7.002 on 1 and 2238 DF, p-value: 0.0082
webvisit_vs_webpurchase$coefficients ## extracting the coefficients
## (Intercept) NumWebVisitsMonth
## 4.42480627 -0.06394878
Scatter plot of Number of Web Visits to Web Purchses
chart5 <- ggplot(data=mktg_data, aes(x=NumWebVisitsMonth,y=NumWebPurchases)) + geom_point() + xlab("Web Visits") + ylab("Web Purchase")
ggplotly(chart5)
**Interpretation:
Q3: Is there a relation between geographical region and the success of a campaign?
webvisit_vs_country <- lm(NumWebPurchases~Country, data=mktg_data)
summary(webvisit_vs_country) ## extracting the summary
##
## Call:
## lm(formula = NumWebPurchases ~ Country, data = mktg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4404 -2.0018 -0.4404 1.9125 22.9982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.08750 0.21970 18.605 <2e-16 ***
## CountryCA 0.21847 0.27764 0.787 0.431
## CountryGER -0.11250 0.33559 -0.335 0.737
## CountryIND -0.14155 0.31693 -0.447 0.655
## CountryME 1.91250 1.61941 1.181 0.238
## CountrySA 0.05790 0.26680 0.217 0.828
## CountrySP -0.08567 0.23520 -0.364 0.716
## CountryUS 0.35287 0.34513 1.022 0.307
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.779 on 2232 degrees of freedom
## Multiple R-squared: 0.002948, Adjusted R-squared: -0.0001785
## F-statistic: 0.9429 on 7 and 2232 DF, p-value: 0.4719
webvisit_vs_country$coefficients ## extracting the coefficients
## (Intercept) CountryCA CountryGER CountryIND CountryME CountrySA
## 4.08750000 0.21847015 -0.11250000 -0.14155405 1.91250000 0.05790059
## CountrySP CountryUS
## -0.08567352 0.35286697
ggplotly(chart5b, type = “1”, lwd = 2)
chart5b <- ggplot(data=mktg_data, aes(x=Country,y=NumWebPurchases)) + geom_point() + xlab("Country") + ylab("Web Purchase")
chart5b
Interpretation: The geographical region is not statistically significant to the success of the campaign
Q4. Average amount spent on fruits products in the last two years is $26.36
avg_amount_fruits <- mean(mktg_data$MntFruits)
avg_amount_fruits
## [1] 26.30223
Q5.Is the variation in the amount spent on fish and and meat affected by any qualitative factors? If yes, which ones?
### Complain is a qualitative factor that would affects the variation in the amount spent of Fish and Meat product but there is no significant relation between complain and purchases.
fishNmeat_vs_compain <- lm(Complain~MntMeatProducts+MntFishProducts,data=mktg_data)
summary(fishNmeat_vs_compain)
##
## Call:
## lm(formula = Complain ~ MntMeatProducts + MntFishProducts, data = mktg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.01133 -0.01112 -0.01056 -0.00813 0.99372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.134e-02 2.618e-03 4.330 1.55e-05 ***
## MntMeatProducts -7.301e-06 1.097e-05 -0.666 0.506
## MntFishProducts -1.982e-05 4.533e-05 -0.437 0.662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0964 on 2237 degrees of freedom
## Multiple R-squared: 0.0006369, Adjusted R-squared: -0.0002566
## F-statistic: 0.7128 on 2 and 2237 DF, p-value: 0.4904
Q6: Fish has Omega 3 fatty acids, good for brain, accordingly, do people with advanced degrees purchase more fish than others?
## Representation by a circular bar and pie-chart
chart6 <- ggplot(mktg_data, aes(x = Education, y = MntFishProducts, fill = Education)) +
geom_col() + coord_polar("y", start = 0)
chart6
** The maximum number Fish purchases come from people who are Graduates**
Education v Fish Products bar chart
chart7 <- ggplot(mktg_data, aes(x = Education, y = MntFishProducts, fill = Education)) +
geom_col()
chart7
Q7. Teenagers are fussy on food, which foods do families with teenagers spend most on?
##Prepare a data frame for Teenhome, Type, Value
df_bar <- data.frame(Teenhome=mktg_data$Teenhome,type="MntFishProducts",value=mktg_data$MntFishProducts)
df_bar <- rbind(df_bar,data.frame(Teenhome=mktg_data$Teenhome,type="MntMeatProducts",value=mktg_data$MntMeatProducts))
df_bar <- rbind(df_bar,data.frame(Teenhome=mktg_data$Teenhome,type="MntFruits",value=mktg_data$MntFruits))
df_bar <- rbind(df_bar,data.frame(Teenhome=mktg_data$Teenhome,type="MntSweetProducts",value=mktg_data$MntSweetProducts))
df_bar <- rbind(df_bar,data.frame(Teenhome=mktg_data$Teenhome,type="MntWines",value=mktg_data$MntWines))
df_bar <- rbind(df_bar,data.frame(Teenhome=mktg_data$Teenhome,type="MntGoldProds",value=mktg_data$MntGoldProds))
# Analysis by ratio
chart8 <- ggplot(df_bar, aes(x = Teenhome, y =value, fill=type)) + geom_bar(stat = "identity", position="fill")
chart8
Q8. Which marketing campaign is most successful?
df_bar2 <- data.frame(AcceptedCmp=c(sum(mktg_data$AcceptedCmp1),
sum(mktg_data$AcceptedCmp2),
sum(mktg_data$AcceptedCmp3),
sum(mktg_data$AcceptedCmp4)),
name=c("AcceptedCmp1",
"AcceptedCmp2",
"AcceptedCmp3",
"AcceptedCmp4"))
ggplot(df_bar2, aes(x=name, y=AcceptedCmp, fill=name))+ geom_bar(stat = "identity")
Q9. What kind of customers are complaining?
### Extracting data of those who complain
complaint <- mktg_data[which(mktg_data$Complain==1),]
chart8 <- ggplot(complaint, aes(x="", y=Education, fill=Education)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) + labs(title = "Pie-chart of complaining customers")
chart8
Q10. Average age of customers in the given sample is 52 years
summary(mktg_data$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.00 44.00 51.00 52.19 62.00 128.00
### Computing correlation coefficients and p-values between variables
cor.test(mktg_data$Income, mktg_data$NumWebPurchases)
##
## Pearson's product-moment correlation
##
## data: mktg_data$Income and mktg_data$NumWebPurchases
## t = 19.801, df = 2214, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3519222 0.4226905
## sample estimates:
## cor
## 0.3878778
cor.test(mktg_data$age, mktg_data$NumWebPurchases)
##
## Pearson's product-moment correlation
##
## data: mktg_data$age and mktg_data$NumWebPurchases
## t = 6.9348, df = 2238, p-value = 5.303e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1042505 0.1853426
## sample estimates:
## cor
## 0.1450401
cor.test(mktg_data$NumWebVisitsMonth, mktg_data$NumWebPurchases)
##
## Pearson's product-moment correlation
##
## data: mktg_data$NumWebVisitsMonth and mktg_data$NumWebPurchases
## t = -2.6461, df = 2238, p-value = 0.0082
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09703774 -0.01446393
## sample estimates:
## cor
## -0.05584633
ggcorrplot(corr_matrix1)