Objective

The objective of this project is to explore Coca-Cola’s monthly revenue data from 2007 to 2020, identify patterns, and analyze correlations between time and revenue using R and various data visualization techniques.

Load Libraries

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded

Read the Data

data <- read_excel("C:/Users/nishant/Desktop/New folder (3)/CocaCola_Full_Monthly_Revenue_2007_2020.xlsx")

Project Overview

Data Preparation

data$Month <- factor(data$Month, levels = month.name)
data <- data %>%
  mutate(Date = as.Date(paste(Year, Month, "1", sep = "-"), format="%Y-%B-%d"))

Descriptive Visualizations

We first perform descriptive analysis to understand trends, distributions, and variations in Coca-Cola’s monthly revenues.

1. Line Chart - Revenue Over Time

ggplot(data, aes(x = Date, y = `Net Operating Revenues ($M)`)) +
  geom_line(color = "blue") +
  labs(title = "Revenue Over Time", x = "Date", y = "Revenue ($M)") +
  theme_minimal()

2. Bar Plot - Average Revenue by Month

data %>%
  group_by(Month) %>%
  summarize(Average_Revenue = mean(`Net Operating Revenues ($M)`)) %>%
  ggplot(aes(x = Month, y = Average_Revenue)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Average Revenue by Month", x = "Month", y = "Average Revenue ($M)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

3. Histogram - Revenue Distribution

ggplot(data, aes(x = `Net Operating Revenues ($M)`)) +
  geom_histogram(binwidth = 100, fill = "orange", color = "black") +
  labs(title = "Revenue Distribution", x = "Revenue ($M)", y = "Count") +
  theme_minimal()

4. Boxplot - Revenue by Year

ggplot(data, aes(x = factor(Year), y = `Net Operating Revenues ($M)`)) +
  geom_boxplot(fill = "lightgreen") +
  labs(title = "Revenue Spread by Year", x = "Year", y = "Revenue ($M)") +
  theme_minimal()

5. Pie Chart - Revenue Contribution by Year

yearly_sum <- data %>%
  group_by(Year) %>%
  summarize(Total_Revenue = sum(`Net Operating Revenues ($M)`))

ggplot(yearly_sum, aes(x = "", y = Total_Revenue, fill = factor(Year))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Revenue Share by Year") +
  theme_void()

6. Area Chart - Cumulative Revenue

data <- data %>%
  arrange(Date) %>%
  mutate(Cumulative_Revenue = cumsum(`Net Operating Revenues ($M)`))

ggplot(data, aes(x = Date, y = Cumulative_Revenue)) +
  geom_area(fill = "lightblue") +
  labs(title = "Cumulative Revenue Over Time", x = "Date", y = "Cumulative Revenue ($M)") +
  theme_minimal()

7. Violin Plot - Revenue by Month

ggplot(data, aes(x = Month, y = `Net Operating Revenues ($M)`)) +
  geom_violin(fill = "violet") +
  labs(title = "Revenue Distribution by Month", x = "Month", y = "Revenue ($M)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

8. Dot Plot - Revenue

ggplot(data, aes(x = Date, y = `Net Operating Revenues ($M)`)) +
  geom_point(color = "red") +
  labs(title = "Revenue Dots Over Time", x = "Date", y = "Revenue ($M)") +
  theme_minimal()

Correlation Analysis

Now we study how variables are correlated, particularly how revenue varies with year.

Prepare Data for Correlation

cor_data <- data %>%
  select(Year, `Net Operating Revenues ($M)`)

1. Scatter Plot - Year vs Revenue

ggplot(cor_data, aes(x = Year, y = `Net Operating Revenues ($M)`)) +
  geom_point(color = "darkgreen") +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(title = "Year vs Revenue", x = "Year", y = "Revenue ($M)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

2. Correlation Matrix

corr_matrix <- cor(cor_data)
corrplot(corr_matrix, method = "circle", type = "upper")

3. Heatmap of Correlations

corrplot(corr_matrix, method = "color", addCoef.col = "black", number.cex = 0.7)

4. Jitter Plot - Revenue Trend Over Years

ggplot(cor_data, aes(x = Year, y = `Net Operating Revenues ($M)`)) +
  geom_jitter(width = 0.3, height = 0, color = "purple") +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Revenue Trend Over Years (Jitter Plot)", x = "Year", y = "Revenue ($M)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'