The objective of this project is to explore Coca-Cola’s monthly revenue data from 2007 to 2020, identify patterns, and analyze correlations between time and revenue using R and various data visualization techniques.
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
data <- read_excel("C:/Users/nishant/Desktop/New folder (3)/CocaCola_Full_Monthly_Revenue_2007_2020.xlsx")
data$Month <- factor(data$Month, levels = month.name)
data <- data %>%
mutate(Date = as.Date(paste(Year, Month, "1", sep = "-"), format="%Y-%B-%d"))
We first perform descriptive analysis to understand trends, distributions, and variations in Coca-Cola’s monthly revenues.
ggplot(data, aes(x = Date, y = `Net Operating Revenues ($M)`)) +
geom_line(color = "blue") +
labs(title = "Revenue Over Time", x = "Date", y = "Revenue ($M)") +
theme_minimal()
data %>%
group_by(Month) %>%
summarize(Average_Revenue = mean(`Net Operating Revenues ($M)`)) %>%
ggplot(aes(x = Month, y = Average_Revenue)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Average Revenue by Month", x = "Month", y = "Average Revenue ($M)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(data, aes(x = `Net Operating Revenues ($M)`)) +
geom_histogram(binwidth = 100, fill = "orange", color = "black") +
labs(title = "Revenue Distribution", x = "Revenue ($M)", y = "Count") +
theme_minimal()
ggplot(data, aes(x = factor(Year), y = `Net Operating Revenues ($M)`)) +
geom_boxplot(fill = "lightgreen") +
labs(title = "Revenue Spread by Year", x = "Year", y = "Revenue ($M)") +
theme_minimal()
yearly_sum <- data %>%
group_by(Year) %>%
summarize(Total_Revenue = sum(`Net Operating Revenues ($M)`))
ggplot(yearly_sum, aes(x = "", y = Total_Revenue, fill = factor(Year))) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Revenue Share by Year") +
theme_void()
data <- data %>%
arrange(Date) %>%
mutate(Cumulative_Revenue = cumsum(`Net Operating Revenues ($M)`))
ggplot(data, aes(x = Date, y = Cumulative_Revenue)) +
geom_area(fill = "lightblue") +
labs(title = "Cumulative Revenue Over Time", x = "Date", y = "Cumulative Revenue ($M)") +
theme_minimal()
ggplot(data, aes(x = Month, y = `Net Operating Revenues ($M)`)) +
geom_violin(fill = "violet") +
labs(title = "Revenue Distribution by Month", x = "Month", y = "Revenue ($M)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(data, aes(x = Date, y = `Net Operating Revenues ($M)`)) +
geom_point(color = "red") +
labs(title = "Revenue Dots Over Time", x = "Date", y = "Revenue ($M)") +
theme_minimal()
Now we study how variables are correlated, particularly how revenue varies with year.
cor_data <- data %>%
select(Year, `Net Operating Revenues ($M)`)
ggplot(cor_data, aes(x = Year, y = `Net Operating Revenues ($M)`)) +
geom_point(color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
labs(title = "Year vs Revenue", x = "Year", y = "Revenue ($M)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
corr_matrix <- cor(cor_data)
corrplot(corr_matrix, method = "circle", type = "upper")
corrplot(corr_matrix, method = "color", addCoef.col = "black", number.cex = 0.7)
ggplot(cor_data, aes(x = Year, y = `Net Operating Revenues ($M)`)) +
geom_jitter(width = 0.3, height = 0, color = "purple") +
geom_smooth(method = "lm", color = "red") +
labs(title = "Revenue Trend Over Years (Jitter Plot)", x = "Year", y = "Revenue ($M)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'