Introduction

This report presents an data analysis of Netflix stock data from 2022-2023. The analysis focuses on stock price movements, returns, volatility, and trading volumes.

Data Loading and Preprocessing

# Load required libraries
library(tidyverse)
library(readxl)
library(lubridate)
library(reshape2)
library(zoo)
library(scales)
# Import data from Excel file
netflix_data <- read_excel("netflix_stock_data_20250210_155914.xlsx", sheet = "Data")

# Convert data to a proper data frame
netflix_df <- as.data.frame(netflix_data)

# Create and format Date column
netflix_df$Date <- as.Date(netflix_df$...1)

# Remove the original date column and reorganize
netflix_df <- netflix_df %>%
  select(-...1) %>%  # Remove the original date column
  relocate(Date)     # Move Date to first column

# Check for missing values
missing_count <- sum(is.na(netflix_df))
print(paste("Total missing values:", missing_count))
## [1] "Total missing values: 0"
# Rename columns for clarity
netflix_df <- netflix_df %>%
  rename(
    OpenPrice = open,
    HighPrice = high,
    LowPrice = low,
    ClosePrice = close,
    Volume = volume,
    DailyReturn = daily_return,
    Volatility = volatility
  )

# Create additional features for analysis
netflix_df <- netflix_df %>%
  mutate(
    Month = month(Date),
    Year = year(Date),
    WeekDay = wday(Date, label = TRUE),
    MonthName = factor(month.name[Month], levels = month.name)
  )

# Display the first few rows
head(netflix_df)
##         Date OpenPrice HighPrice LowPrice ClosePrice  Volume DailyReturn
## 1 2022-01-03    605.61    609.99   590.56     597.37 3068808       -0.84
## 2 2022-01-04    599.91    600.41   581.60     591.15 4393135       -1.04
## 3 2022-01-05    592.00    592.84   566.88     567.52 4148749       -4.00
## 4 2022-01-06    554.34    563.36   542.01     553.29 5711795       -2.51
## 5 2022-01-07    549.46    553.43   538.22     541.06 3382873       -2.21
## 6 2022-01-10    538.49    543.69   526.32     539.85 4486145       -0.22
##   Volatility    ma_50   ma_200 open_outlier high_outlier low_outlier
## 1       1.37 640.6854 565.2527        FALSE        FALSE       FALSE
## 2       1.32 639.2128 565.5929        FALSE        FALSE       FALSE
## 3       1.46 637.1300 565.7551        FALSE        FALSE       FALSE
## 4       1.52 634.8254 565.9175        FALSE        FALSE       FALSE
## 5       1.48 632.3882 566.1085        FALSE        FALSE       FALSE
## 6       1.48 629.7042 566.2675        FALSE        FALSE       FALSE
##   close_outlier price_consistency Month Year WeekDay MonthName
## 1         FALSE              TRUE     1 2022     Mon   January
## 2         FALSE              TRUE     1 2022     Tue   January
## 3         FALSE              TRUE     1 2022     Wed   January
## 4         FALSE              TRUE     1 2022     Thu   January
## 5         FALSE              TRUE     1 2022     Fri   January
## 6         FALSE              TRUE     1 2022     Mon   January

Summary Statistics

# Basic summary statistics
summary(netflix_df %>% select(ClosePrice, Volume, DailyReturn, Volatility))
##    ClosePrice        Volume           DailyReturn          Volatility   
##  Min.   :166.4   Min.   :  1404732   Min.   :-35.12000   Min.   :1.020  
##  1st Qu.:249.3   1st Qu.:  4778118   1st Qu.: -1.73000   1st Qu.:2.040  
##  Median :342.5   Median :  6527391   Median : -0.08000   Median :2.860  
##  Mean   :337.3   Mean   :  8640660   Mean   :  0.02433   Mean   :3.168  
##  3rd Qu.:409.4   3rd Qu.:  9538226   3rd Qu.:  1.58000   3rd Qu.:3.770  
##  Max.   :597.4   Max.   :132644607   Max.   : 16.05000   Max.   :8.890

Exploratory Data Analysis

1. Stock Price Over Time

ggplot(netflix_df, aes(x = Date, y = ClosePrice)) +
  geom_line(color = "blue") +
  labs(title = "Netflix Stock Price (2022-2023)",
       x = "Date",
       y = "Closing Price (USD)") +
  theme_minimal()

2. Distribution of Daily Returns

ggplot(netflix_df, aes(x = DailyReturn)) +
  geom_histogram(bins = 30, fill = "steelblue", color = "black") +
  labs(title = "Distribution of Daily Returns",
       x = "Daily Return (%)",
       y = "Frequency") +
  theme_minimal()

3. Relationship Between Trading Volume and Price Volatility

ggplot(netflix_df, aes(x = Volume, y = Volatility)) +
  geom_point(alpha = 0.5, color = "purple") +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Relationship Between Trading Volume and Price Volatility",
       x = "Trading Volume",
       y = "Volatility (%)") +
  theme_minimal()

4. Correlation Matrix Heatmap

cor_matrix <- cor(netflix_df[, c("OpenPrice", "HighPrice", "LowPrice", 
                               "ClosePrice", "Volume", "DailyReturn", 
                               "Volatility")], use = "complete.obs")

# Create heatmap
ggplot(data = melt(cor_matrix), aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                      midpoint = 0, limit = c(-1,1)) +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Correlation Matrix of Netflix Stock Variables")

5. Stock Price with Moving Averages

# Calculate moving averages
netflix_df <- netflix_df %>%
  mutate(MA50 = rollmean(ClosePrice, k = 50, fill = NA),
         MA200 = rollmean(ClosePrice, k = 200, fill = NA))

# Plot with moving averages
ggplot(netflix_df, aes(x = Date)) +
  geom_line(aes(y = ClosePrice), color = "blue") +
  geom_line(aes(y = MA50), color = "red", linewidth = 1) +
  geom_line(aes(y = MA200), color = "green", linewidth = 1) +
  labs(title = "Netflix Stock Price with Moving Averages",
       x = "Date", y = "Price (USD)") +
  theme_minimal() +
  annotate("text", x = min(netflix_df$Date) + 30, 
           y = max(netflix_df$ClosePrice) * 0.9, 
           label = "Blue: Price, Red: 50-day MA, Green: 200-day MA")

6. Daily Returns by Month

ggplot(netflix_df, aes(x = MonthName, y = DailyReturn, fill = MonthName)) +
  geom_boxplot() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  labs(title = "Distribution of Daily Returns by Month",
       x = "Month", y = "Daily Return (%)") +
  scale_fill_brewer(palette = "Set3")

7. Trading Volume Over Time

ggplot(netflix_df, aes(x = Date, y = Volume)) +
  geom_bar(stat = "identity", fill = "darkblue", alpha = 0.7) +
  labs(title = "Netflix Trading Volume Over Time",
       x = "Date", y = "Volume") +
  theme_minimal() +
  scale_y_continuous(labels = comma_format())

Saving the Processed Data

# Save as CSV
write.csv(netflix_df, "netflix_processed.csv", row.names = FALSE)

# Save as R data file
saveRDS(netflix_df, "netflix_processed.rds")

Conclusion

This data analysis reveals several interesting patterns in Netflix stock over 2022-2023. The stock price shows significant volatility with a notable decrease in early 2022 followed by a recovery trend. Where daily returns appear to follow a normal distribution but with some outliers. The correlation matrix shows strong positive correlations between price variables (open, high, low, close). Monthly analysis reveals that certain months experienced more volatile returns than others. The relationship between trading volume and volatility suggests that higher trading volumes are somewhat associated with increased volatility.