This report presents an data analysis of Netflix stock data from 2022-2023. The analysis focuses on stock price movements, returns, volatility, and trading volumes.
# Load required libraries
library(tidyverse)
library(readxl)
library(lubridate)
library(reshape2)
library(zoo)
library(scales)
# Import data from Excel file
netflix_data <- read_excel("netflix_stock_data_20250210_155914.xlsx", sheet = "Data")
# Convert data to a proper data frame
netflix_df <- as.data.frame(netflix_data)
# Create and format Date column
netflix_df$Date <- as.Date(netflix_df$...1)
# Remove the original date column and reorganize
netflix_df <- netflix_df %>%
select(-...1) %>% # Remove the original date column
relocate(Date) # Move Date to first column
# Check for missing values
missing_count <- sum(is.na(netflix_df))
print(paste("Total missing values:", missing_count))
## [1] "Total missing values: 0"
# Rename columns for clarity
netflix_df <- netflix_df %>%
rename(
OpenPrice = open,
HighPrice = high,
LowPrice = low,
ClosePrice = close,
Volume = volume,
DailyReturn = daily_return,
Volatility = volatility
)
# Create additional features for analysis
netflix_df <- netflix_df %>%
mutate(
Month = month(Date),
Year = year(Date),
WeekDay = wday(Date, label = TRUE),
MonthName = factor(month.name[Month], levels = month.name)
)
# Display the first few rows
head(netflix_df)
## Date OpenPrice HighPrice LowPrice ClosePrice Volume DailyReturn
## 1 2022-01-03 605.61 609.99 590.56 597.37 3068808 -0.84
## 2 2022-01-04 599.91 600.41 581.60 591.15 4393135 -1.04
## 3 2022-01-05 592.00 592.84 566.88 567.52 4148749 -4.00
## 4 2022-01-06 554.34 563.36 542.01 553.29 5711795 -2.51
## 5 2022-01-07 549.46 553.43 538.22 541.06 3382873 -2.21
## 6 2022-01-10 538.49 543.69 526.32 539.85 4486145 -0.22
## Volatility ma_50 ma_200 open_outlier high_outlier low_outlier
## 1 1.37 640.6854 565.2527 FALSE FALSE FALSE
## 2 1.32 639.2128 565.5929 FALSE FALSE FALSE
## 3 1.46 637.1300 565.7551 FALSE FALSE FALSE
## 4 1.52 634.8254 565.9175 FALSE FALSE FALSE
## 5 1.48 632.3882 566.1085 FALSE FALSE FALSE
## 6 1.48 629.7042 566.2675 FALSE FALSE FALSE
## close_outlier price_consistency Month Year WeekDay MonthName
## 1 FALSE TRUE 1 2022 Mon January
## 2 FALSE TRUE 1 2022 Tue January
## 3 FALSE TRUE 1 2022 Wed January
## 4 FALSE TRUE 1 2022 Thu January
## 5 FALSE TRUE 1 2022 Fri January
## 6 FALSE TRUE 1 2022 Mon January
# Basic summary statistics
summary(netflix_df %>% select(ClosePrice, Volume, DailyReturn, Volatility))
## ClosePrice Volume DailyReturn Volatility
## Min. :166.4 Min. : 1404732 Min. :-35.12000 Min. :1.020
## 1st Qu.:249.3 1st Qu.: 4778118 1st Qu.: -1.73000 1st Qu.:2.040
## Median :342.5 Median : 6527391 Median : -0.08000 Median :2.860
## Mean :337.3 Mean : 8640660 Mean : 0.02433 Mean :3.168
## 3rd Qu.:409.4 3rd Qu.: 9538226 3rd Qu.: 1.58000 3rd Qu.:3.770
## Max. :597.4 Max. :132644607 Max. : 16.05000 Max. :8.890
ggplot(netflix_df, aes(x = Date, y = ClosePrice)) +
geom_line(color = "blue") +
labs(title = "Netflix Stock Price (2022-2023)",
x = "Date",
y = "Closing Price (USD)") +
theme_minimal()
ggplot(netflix_df, aes(x = DailyReturn)) +
geom_histogram(bins = 30, fill = "steelblue", color = "black") +
labs(title = "Distribution of Daily Returns",
x = "Daily Return (%)",
y = "Frequency") +
theme_minimal()
ggplot(netflix_df, aes(x = Volume, y = Volatility)) +
geom_point(alpha = 0.5, color = "purple") +
geom_smooth(method = "lm", color = "red") +
labs(title = "Relationship Between Trading Volume and Price Volatility",
x = "Trading Volume",
y = "Volatility (%)") +
theme_minimal()
cor_matrix <- cor(netflix_df[, c("OpenPrice", "HighPrice", "LowPrice",
"ClosePrice", "Volume", "DailyReturn",
"Volatility")], use = "complete.obs")
# Create heatmap
ggplot(data = melt(cor_matrix), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Matrix of Netflix Stock Variables")
# Calculate moving averages
netflix_df <- netflix_df %>%
mutate(MA50 = rollmean(ClosePrice, k = 50, fill = NA),
MA200 = rollmean(ClosePrice, k = 200, fill = NA))
# Plot with moving averages
ggplot(netflix_df, aes(x = Date)) +
geom_line(aes(y = ClosePrice), color = "blue") +
geom_line(aes(y = MA50), color = "red", linewidth = 1) +
geom_line(aes(y = MA200), color = "green", linewidth = 1) +
labs(title = "Netflix Stock Price with Moving Averages",
x = "Date", y = "Price (USD)") +
theme_minimal() +
annotate("text", x = min(netflix_df$Date) + 30,
y = max(netflix_df$ClosePrice) * 0.9,
label = "Blue: Price, Red: 50-day MA, Green: 200-day MA")
ggplot(netflix_df, aes(x = MonthName, y = DailyReturn, fill = MonthName)) +
geom_boxplot() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
labs(title = "Distribution of Daily Returns by Month",
x = "Month", y = "Daily Return (%)") +
scale_fill_brewer(palette = "Set3")
ggplot(netflix_df, aes(x = Date, y = Volume)) +
geom_bar(stat = "identity", fill = "darkblue", alpha = 0.7) +
labs(title = "Netflix Trading Volume Over Time",
x = "Date", y = "Volume") +
theme_minimal() +
scale_y_continuous(labels = comma_format())
# Save as CSV
write.csv(netflix_df, "netflix_processed.csv", row.names = FALSE)
# Save as R data file
saveRDS(netflix_df, "netflix_processed.rds")
This data analysis reveals several interesting patterns in Netflix stock over 2022-2023. The stock price shows significant volatility with a notable decrease in early 2022 followed by a recovery trend. Where daily returns appear to follow a normal distribution but with some outliers. The correlation matrix shows strong positive correlations between price variables (open, high, low, close). Monthly analysis reveals that certain months experienced more volatile returns than others. The relationship between trading volume and volatility suggests that higher trading volumes are somewhat associated with increased volatility.