library(tidyverse)
library(plotly)
library(lubridate)
library(corrplot)
# Read csv file
dat <- read_csv("C:/Users/LENOVO/Downloads/nykaa_campaign_data.csv")
## Rows: 55555 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Campaign_ID, Campaign_Type, Target_Audience, Channel_Used, Language...
## dbl (9): Duration, Impressions, Clicks, Leads, Conversions, Revenue, Acquisi...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View first rows
head(dat)
# Check structure
str(dat)
## spc_tbl_ [55,555 × 16] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Campaign_ID : chr [1:55555] "NY-CMP-1000" "NY-CMP-1001" "NY-CMP-1002" "NY-CMP-1003" ...
## $ Campaign_Type : chr [1:55555] "Social Media" "Paid Ads" "Influencer" "Email" ...
## $ Target_Audience : chr [1:55555] "College Students" "Tier 2 City Customers" "Youth" "Working Women" ...
## $ Duration : num [1:55555] 21 18 23 18 10 26 21 6 27 11 ...
## $ Channel_Used : chr [1:55555] "WhatsApp, YouTube" "YouTube" "WhatsApp, Google, YouTube" "YouTube, Facebook, Instagram" ...
## $ Impressions : num [1:55555] 57804 91801 15536 88114 96871 ...
## $ Clicks : num [1:55555] 6156 3321 2182 8413 3743 ...
## $ Leads : num [1:55555] 3616 1971 952 2231 2060 ...
## $ Conversions : num [1:55555] 2355 1357 755 947 1258 ...
## $ Revenue : num [1:55555] 1867515 1046247 197055 376906 518296 ...
## $ Acquisition_Cost: num [1:55555] 111 180.8 90.6 249.1 228.6 ...
## $ ROI : num [1:55555] 6.14 3.26 1.88 0.6 0.8 3.09 1.17 1.4 3.73 1.61 ...
## $ Language : chr [1:55555] "Hindi" "Hindi" "English" "Hindi" ...
## $ Engagement_Score: num [1:55555] 20.98 7.24 25.03 13.15 7.29 ...
## $ Customer_Segment: chr [1:55555] "College Students" "College Students" "College Students" "College Students" ...
## $ Date : chr [1:55555] "29-04-2025" "06-04-2025" "14-01-2025" "04-06-2025" ...
## - attr(*, "spec")=
## .. cols(
## .. Campaign_ID = col_character(),
## .. Campaign_Type = col_character(),
## .. Target_Audience = col_character(),
## .. Duration = col_double(),
## .. Channel_Used = col_character(),
## .. Impressions = col_double(),
## .. Clicks = col_double(),
## .. Leads = col_double(),
## .. Conversions = col_double(),
## .. Revenue = col_double(),
## .. Acquisition_Cost = col_double(),
## .. ROI = col_double(),
## .. Language = col_character(),
## .. Engagement_Score = col_double(),
## .. Customer_Segment = col_character(),
## .. Date = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Convert date column
dat$Date <- as.Date(dat$Date)
# Check missing values
colSums(is.na(dat))
## Campaign_ID Campaign_Type Target_Audience Duration
## 0 0 0 0
## Channel_Used Impressions Clicks Leads
## 0 0 0 0
## Conversions Revenue Acquisition_Cost ROI
## 0 0 0 0
## Language Engagement_Score Customer_Segment Date
## 0 0 0 0
# Summary statistics
summary(dat)
## Campaign_ID Campaign_Type Target_Audience Duration
## Length:55555 Length:55555 Length:55555 Min. : 5.00
## Class :character Class :character Class :character 1st Qu.:11.00
## Mode :character Mode :character Mode :character Median :17.00
## Mean :17.45
## 3rd Qu.:24.00
## Max. :30.00
## Channel_Used Impressions Clicks Leads
## Length:55555 Min. : 10001 Min. : 202 Min. : 56
## Class :character 1st Qu.: 32680 1st Qu.: 2110 1st Qu.: 779
## Mode :character Median : 55182 Median : 3907 Median :1481
## Mean : 55088 Mean : 4688 Mean :1877
## 3rd Qu.: 77515 3rd Qu.: 6688 3rd Qu.:2605
## Max. :100000 Max. :14868 Max. :8876
## Conversions Revenue Acquisition_Cost ROI
## Min. : 19 Min. : 6183 Min. : 9.08 Min. :-0.970
## 1st Qu.: 400 1st Qu.: 177706 1st Qu.: 105.44 1st Qu.: 0.040
## Median : 779 Median : 360436 Median : 207.51 Median : 1.240
## Mean :1033 Mean : 515820 Mean : 377.35 Mean : 2.714
## 3rd Qu.:1414 3rd Qu.: 687423 3rd Qu.: 428.58 3rd Qu.: 3.630
## Max. :6686 Max. :4579910 Max. :15473.16 Max. :74.420
## Language Engagement_Score Customer_Segment Date
## Length:55555 Min. : 2.60 Length:55555 Min. :0001-01-20
## Class :character 1st Qu.: 8.36 Class :character 1st Qu.:0008-05-20
## Mode :character Median :13.60 Mode :character Median :0015-11-20
## Mean :13.78 Mean :0016-01-04
## 3rd Qu.:18.86 3rd Qu.:0023-07-20
## Max. :30.91 Max. :0031-12-20
roi_plot <- dat %>%
group_by(Campaign_Type) %>%
summarise(avg_roi = mean(ROI, na.rm = TRUE)) %>%
ggplot(aes(x = Campaign_Type,
y = avg_roi,
fill = Campaign_Type)) +
geom_bar(stat = "identity") +
labs(
title = "Average ROI by Campaign Type",
x = "Campaign Type",
y = "Average ROI"
) +
theme_minimal()
roi_plot
revenue_plot <- dat %>%
group_by(Date) %>%
summarise(total_revenue = sum(Revenue, na.rm = TRUE)) %>%
ggplot(aes(x = Date,
y = total_revenue)) +
geom_line(size = 1) +
labs(
title = "Revenue Trend Over Time",
x = "Date",
y = "Revenue"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
revenue_plot
scatter_plot <- ggplot(dat,
aes(x = Impressions,
y = Conversions,
color = Campaign_Type)) +
geom_point(size = 3, alpha = 0.7) +
labs(
title = "Relationship Between Impressions and Conversions",
x = "Impressions",
y = "Conversions"
) +
theme_minimal()
scatter_plot
box_plot <- ggplot(dat,
aes(x = Language,
y = Engagement_Score,
fill = Language)) +
geom_boxplot() +
labs(
title = "Distribution of Engagement Scores Across Languages",
x = "Language",
y = "Engagement Score"
) +
theme_minimal()
box_plot
# Visualization 5-Correlation heatmap
# Select numeric columns
numeric_data <- dat %>%
select(Impressions,
Clicks,
Leads,
Conversions,
Revenue,
ROI,
Engagement_Score)
# Correlation matrix
cor_matrix <- cor(numeric_data,
use = "complete.obs")
# Heatmap
corrplot(cor_matrix,
method = "color",
type = "upper",
tl.cex = 0.8)
# Visualization 6 - Customer segment by campaign type
stacked_plot <- ggplot(dat,
aes(x = Campaign_Type,
fill = Customer_Segment)) +
geom_bar() +
labs(
title = "Customer Segment Distribution by Campaign Type",
x = "Campaign Type",
y = "Count"
) +
theme_minimal()
stacked_plot
# Visualization 7- Interactive plotly visulaization
interactive_plot <- plot_ly(
data = dat,
x = ~Impressions,
y = ~Revenue,
color = ~Campaign_Type,
type = "scatter",
mode = "markers",
text = ~paste(
"Campaign:", Campaign_Type,
"<br>ROI:", ROI,
"<br>Engagement:", Engagement_Score
)
)
interactive_plot
#Visualization 8 - Revenue distribution by channel
facet_plot <- ggplot(dat,
aes(x = Revenue,
fill = Channel_Used)) +
geom_histogram(bins = 20) +
facet_wrap(~Campaign_Type) +
labs(
title = "Revenue Distribution Across Different Channels",
x = "Revenue",
y = "Count"
) +
theme_minimal()
facet_plot
# Conclusion
##
## This project analyzed Nykaa marketing campaign performance using multiple visualization techniques.
## The analysis identified relationships between impressions, conversions, engagement, ROI, and revenue trends.
## Interactive and comparative visualizations provided insights into campaign effectiveness across customer segments and channels.