The project deals with analyzing marketing strategies for hospitals, categorized into small, medium, large, and private facilities. The goal is to identify which marketing tactics (e.g., email, flyers, phone calls, direct sales visits) are most effective for each type of hospital account.
This involves understanding organic sales generation and how additional sales can be driven through specific marketing activities. The analysis acknowledges that not all marketing tactics are equally effective across all account types, aiming to tailor strategies to maximize the impact on sales.
library(readr)
library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
library(tibble)
library(corrplot)
library(gt)
library(broom)
head(df)
## # A tibble: 6 × 17
## `Client ID` `Client Type` `Number of Customers` `Montly Target` `Zip Code`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 ID-987275 Medium Facility 2800 125 1003
## 2 ID-987275 Medium Facility 2800 125 1003
## 3 ID-987275 Medium Facility 2800 125 1003
## 4 ID-987275 Medium Facility 2800 125 1003
## 5 ID-987275 Medium Facility 2800 125 1003
## 6 ID-987275 Medium Facility 2800 125 1003
## # ℹ 12 more variables: Calendardate <chr>, `Amount Collected` <dbl>,
## # `Unit Sold` <dbl>, `Campaign (Email)` <dbl>, `Campaign (Flyer)` <dbl>,
## # `Campaign (Phone)` <dbl>, `Sales Contact 1` <dbl>, `Sales Contact 2` <dbl>,
## # `Sales Contact 3` <dbl>, `Sales Contact 4` <dbl>, `Sales Contact 5` <dbl>,
## # `Number of Competition` <chr>
str(df)
## spc_tbl_ [2,976 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Client ID : chr [1:2976] "ID-987275" "ID-987275" "ID-987275" "ID-987275" ...
## $ Client Type : chr [1:2976] "Medium Facility" "Medium Facility" "Medium Facility" "Medium Facility" ...
## $ Number of Customers : num [1:2976] 2800 2800 2800 2800 2800 2800 2800 2800 2800 2800 ...
## $ Montly Target : num [1:2976] 125 125 125 125 125 125 125 125 125 125 ...
## $ Zip Code : num [1:2976] 1003 1003 1003 1003 1003 ...
## $ Calendardate : chr [1:2976] "16-01-2014" "16-02-2014" "18-03-2014" "18-04-2014" ...
## $ Amount Collected : num [1:2976] 0 3409460 10228384 17047304 23866224 ...
## $ Unit Sold : num [1:2976] 0 24 75 123 171 198 123 123 123 198 ...
## $ Campaign (Email) : num [1:2976] 0 0 0 0 0 ...
## $ Campaign (Flyer) : num [1:2976] 0 0 0 0 0 ...
## $ Campaign (Phone) : num [1:2976] 0 0 0 0 0 0 0 0 0 0 ...
## $ Sales Contact 1 : num [1:2976] 0 0 0 0 0 0 0 0 0 0 ...
## $ Sales Contact 2 : num [1:2976] 0 0 0 3547500 0 ...
## $ Sales Contact 3 : num [1:2976] 0 0 0 1290000 0 ...
## $ Sales Contact 4 : num [1:2976] 0 0 0 0 0 0 0 0 0 0 ...
## $ Sales Contact 5 : num [1:2976] 0 322500 0 0 0 ...
## $ Number of Competition: chr [1:2976] "Low" "Low" "Low" "Low" ...
## - attr(*, "spec")=
## .. cols(
## .. `Client ID` = col_character(),
## .. `Client Type` = col_character(),
## .. `Number of Customers` = col_double(),
## .. `Montly Target` = col_double(),
## .. `Zip Code` = col_double(),
## .. Calendardate = col_character(),
## .. `Amount Collected` = col_double(),
## .. `Unit Sold` = col_double(),
## .. `Campaign (Email)` = col_double(),
## .. `Campaign (Flyer)` = col_double(),
## .. `Campaign (Phone)` = col_double(),
## .. `Sales Contact 1` = col_double(),
## .. `Sales Contact 2` = col_double(),
## .. `Sales Contact 3` = col_double(),
## .. `Sales Contact 4` = col_double(),
## .. `Sales Contact 5` = col_double(),
## .. `Number of Competition` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
## Cleaning the CalendarDate Column from (Chr) to Date.
df <- df %>%
mutate(Calendardate = as.Date(Calendardate, format = "%d-%m-%Y"),
Calendar_Month = month(Calendardate),
Calendar_Year = year(Calendardate))
# Replacing spaces with underscores in column names
colnames(df) <- gsub(" ", "_", colnames(df))
colnames(df) <- gsub("\\(|\\)", "", colnames(df))
# Check the updated column names
colnames(df)
## [1] "Client_ID" "Client_Type" "Number_of_Customers"
## [4] "Montly_Target" "Zip_Code" "Calendardate"
## [7] "Amount_Collected" "Unit_Sold" "Campaign_Email"
## [10] "Campaign_Flyer" "Campaign_Phone" "Sales_Contact_1"
## [13] "Sales_Contact_2" "Sales_Contact_3" "Sales_Contact_4"
## [16] "Sales_Contact_5" "Number_of_Competition" "Calendar_Month"
## [19] "Calendar_Year"
# Distribution of Client Types
df %>% count(Client_Type) %>% mutate(Percentage = n / sum(n) * 100)
## # A tibble: 4 × 3
## Client_Type n Percentage
## <chr> <int> <dbl>
## 1 Large Facility 1368 46.0
## 2 Medium Facility 504 16.9
## 3 Private Facility 264 8.87
## 4 Small Facility 840 28.2
# Mean Amount Collected by Client Type
df %>% group_by(Client_Type) %>% summarise(Avg_Amount_Collected = mean(Amount_Collected, na.rm = TRUE))
## # A tibble: 4 × 2
## Client_Type Avg_Amount_Collected
## <chr> <dbl>
## 1 Large Facility 19998805.
## 2 Medium Facility 40759968.
## 3 Private Facility 5030246.
## 4 Small Facility 1637759.
# Correlation between marketing strategies and Amount Collected
cor_matrix <- cor(df %>% select(Amount_Collected, Campaign_Email, Campaign_Flyer, Campaign_Phone, Sales_Contact_1, Sales_Contact_2, Sales_Contact_3, Sales_Contact_4, Sales_Contact_5), use = "complete.obs")
cor_matrix['Amount_Collected',]
## Amount_Collected Campaign_Email Campaign_Flyer Campaign_Phone
## 1.00000000 0.24823455 0.44433668 0.03485845
## Sales_Contact_1 Sales_Contact_2 Sales_Contact_3 Sales_Contact_4
## 0.27747840 0.55211154 0.35788696 0.23616507
## Sales_Contact_5
## 0.09579479
# Removing total 'Amount_collected', which would be a correlation of itself
cor_data <- cor_matrix['Amount_Collected', -1]
# Transform into a data frame for plotting
cor_df <- data.frame(
Strategy = names(cor_data),
Correlation = cor_data
)
# Create the bar plot
ggplot(cor_df, aes(x = reorder(Strategy, Correlation), y = Correlation, fill = Correlation)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates to make it horizontal
scale_fill_gradient(low = "red", high = "green") + # Use red to green gradient
labs(title = "Correlation of Marketing Strategies with Amount Collected",
x = "Marketing Strategy",
y = "Degree of Correlation") +
theme_minimal() +
theme(legend.position = "none") # Remove the legend for the fill
# Define a simple function to create a correlation plot for a given subset of data
create_correlation_plot <- function(data_subset, title_suffix) {
cor_matrix <- cor(data_subset %>%
select(Amount_Collected, Campaign_Email, Campaign_Flyer, Campaign_Phone,
Sales_Contact_1, Sales_Contact_2, Sales_Contact_3, Sales_Contact_4, Sales_Contact_5),
use = "complete.obs")
cor_data <- cor_matrix['Amount_Collected', -1] # Exclude self-correlation
cor_df <- data.frame(
Strategy = names(cor_data),
Correlation = cor_data
)
ggplot(cor_df, aes(x = reorder(Strategy, Correlation), y = Correlation, fill = Correlation)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates to make it horizontal
scale_fill_gradient(low = "red", high = "green") +
labs(title = paste("Correlation of Marketing Sales & Strategies -", title_suffix),
x = "Marketing Strategy", y = "Degree of Correlation") +
theme_minimal() +
theme(legend.position = "none")
}
# Create a list of facility types you want to plot
facility_types <- c("Small Facility", "Medium Facility", "Large Facility", "Private Facility")
# Loop through the facility types and create a plot for each
plots <- list()
for (type in facility_types) {
data_subset <- df %>% filter(Client_Type == type)
plots[[type]] <- create_correlation_plot(data_subset, type)
}
# Display the plots
plots$`Small Facility`
plots$`Medium Facility`
plots$`Large Facility`
plots$`Private Facility`
# Modeling a Linear Regression Analysis on Sales on Marketing Strategies & Sales Contacts
model <- lm(Amount_Collected ~ Campaign_Email + Campaign_Flyer + Campaign_Phone +
Sales_Contact_1 + Sales_Contact_2 + Sales_Contact_3 +
Sales_Contact_4 + Sales_Contact_5, data = df)
summary(model)
##
## Call:
## lm(formula = Amount_Collected ~ Campaign_Email + Campaign_Flyer +
## Campaign_Phone + Sales_Contact_1 + Sales_Contact_2 + Sales_Contact_3 +
## Sales_Contact_4 + Sales_Contact_5, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -111513076 -8521151 -1481206 2067059 147916803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.481e+06 5.123e+05 2.891 0.00387 **
## Campaign_Email 7.932e-01 5.968e-01 1.329 0.18388
## Campaign_Flyer 3.338e+00 2.601e-01 12.831 < 2e-16 ***
## Campaign_Phone 7.336e-02 1.053e+00 0.070 0.94444
## Sales_Contact_1 4.237e+00 4.151e-01 10.207 < 2e-16 ***
## Sales_Contact_2 3.638e+00 1.292e-01 28.155 < 2e-16 ***
## Sales_Contact_3 2.343e+00 1.307e-01 17.925 < 2e-16 ***
## Sales_Contact_4 1.095e+01 1.060e+00 10.331 < 2e-16 ***
## Sales_Contact_5 3.508e+00 4.549e+00 0.771 0.44072
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21850000 on 2967 degrees of freedom
## Multiple R-squared: 0.4799, Adjusted R-squared: 0.4785
## F-statistic: 342.1 on 8 and 2967 DF, p-value: < 2.2e-16
# Extracting significant coefficients where p-value is less than 0.05
coef_df <- broom::tidy(model) %>%
filter(p.value < 0.05) %>%
select(term, estimate)
# View the filtered coefficients dataframe
print(coef_df)
## # A tibble: 6 × 2
## term estimate
## <chr> <dbl>
## 1 (Intercept) 1481206.
## 2 Campaign_Flyer 3.34
## 3 Sales_Contact_1 4.24
## 4 Sales_Contact_2 3.64
## 5 Sales_Contact_3 2.34
## 6 Sales_Contact_4 10.9
# Modeling a Linear regression analysis By each Client Type (Small, Medium, Large, Private)
consolidated_summary <- df %>%
group_by(Client_Type) %>%
do({
model <- lm(Amount_Collected ~ Campaign_Email + Campaign_Flyer + Campaign_Phone +
Sales_Contact_1 + Sales_Contact_2 + Sales_Contact_3 +
Sales_Contact_4 + Sales_Contact_5, data = .)
tidy(model) %>%
filter(p.value < 0.05) %>%
select(term, estimate) %>%
arrange(desc(estimate))
}) %>%
ungroup() %>%
rename(Variable = term, `Coefficent (Impact)` = estimate)
print(consolidated_summary)
## # A tibble: 15 × 3
## Client_Type Variable `Coefficent (Impact)`
## <chr> <chr> <dbl>
## 1 Large Facility (Intercept) 2811701.
## 2 Large Facility Sales_Contact_1 11.7
## 3 Large Facility Sales_Contact_4 10.6
## 4 Large Facility Sales_Contact_2 4.00
## 5 Large Facility Campaign_Flyer 2.72
## 6 Large Facility Sales_Contact_3 2.03
## 7 Large Facility Campaign_Phone -3.54
## 8 Medium Facility (Intercept) 5681904.
## 9 Medium Facility Campaign_Flyer 4.11
## 10 Medium Facility Sales_Contact_2 3.58
## 11 Medium Facility Sales_Contact_1 3.14
## 12 Medium Facility Sales_Contact_3 2.12
## 13 Private Facility Sales_Contact_2 6.62
## 14 Small Facility (Intercept) 878923.
## 15 Small Facility Sales_Contact_2 0.810
# This will provide a more granular insight into each Marketing Strategy & Sales Contact from each facility.
# Editing Summarized Table for Business
consolidated_summary_modified <- consolidated_summary %>%
rename(Account_Type = Client_Type, `Return on Investment` = `Coefficent (Impact)`) %>% # Correcting the column names
mutate(`Return on Investment` = round(`Return on Investment`, 2)) %>% # Rounding the 'Return on Investment' to 2 decimal places
filter(Variable != "(Intercept)") # Removing unnecessary grand total rows with the variable "(Intercept)"
print(consolidated_summary_modified)
## # A tibble: 12 × 3
## Account_Type Variable `Return on Investment`
## <chr> <chr> <dbl>
## 1 Large Facility Sales_Contact_1 11.7
## 2 Large Facility Sales_Contact_4 10.6
## 3 Large Facility Sales_Contact_2 4
## 4 Large Facility Campaign_Flyer 2.72
## 5 Large Facility Sales_Contact_3 2.03
## 6 Large Facility Campaign_Phone -3.54
## 7 Medium Facility Campaign_Flyer 4.11
## 8 Medium Facility Sales_Contact_2 3.58
## 9 Medium Facility Sales_Contact_1 3.14
## 10 Medium Facility Sales_Contact_3 2.12
## 11 Private Facility Sales_Contact_2 6.62
## 12 Small Facility Sales_Contact_2 0.81