Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)
library(tidyr)
data <- read.csv("/Users/akxhat879/Downloads/nifty50_25years_ohlcv_1999_2026.csv")
data$Date <- as.Date(data$Date)
data$Date <- as.Date(data$Date)
if(any(is.na(data$Date))){
data$Date <- as.Date(data$Date, format = "%Y-%m-%d")
}
data <- data %>% arrange(Date)
data <- data %>%
mutate(Return = (Close - lag(Close))/lag(Close),
Year = format(Date, "%Y"))
#Q1: Long-Term Trend
ggplot(data, aes(Date, Close)) +
geom_line(color = "darkblue", size = 1) +
labs(title = "Nifty 50 Long-Term Trend", y = "Closing Price") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Q2 : Moving Average
data$MA50 <- stats::filter(data$Close, rep(1/50, 50), sides = 1)
ggplot(data, aes(Date)) +
geom_line(aes(y = Close), color = "blue") +
geom_line(aes(y = MA50), color = "red") +
theme_minimal()
## Warning: Removed 49 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Q3: Risk vs Return
data <- data %>%
mutate(Return = (Close - lag(Close))/lag(Close),
Year = format(Date,"%Y"))
data$Volatility <- ave(data$Return, data$Year, FUN = function(x) sd(x, na.rm = TRUE))
ggplot(data, aes(Return, Volatility)) +
geom_point(color = "purple") +
geom_smooth(method = "lm", color = "orange")+
theme_light()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Q4: Volatility Over Time
ggplot(data, aes(Date, Volatility)) +
geom_line(color = "darkgreen") +
theme_minimal()

#Q5: Volatility Clustering
ggplot(data, aes(Date, abs(Return))) +
geom_line(color = "brown") +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

#Q6: Return Distribution
ggplot(data, aes(Return)) +
geom_density(fill = "skyblue", alpha = 0.6) +
theme_minimal()
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

#Q7: Extreme Events
ggplot(data, aes(Date, Return)) +
geom_line(color = "grey") +
geom_point(data = subset(data, abs(Return) > 0.05),
color = "red") +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

#Q8: Regression Model
model <- lm(Close ~ Open + High + Low, data=data)
summary(model)
##
## Call:
## lm(formula = Close ~ Open + High + Low, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -310.72 -11.95 1.02 12.47 609.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.930330 0.721102 -4.064 4.89e-05 ***
## Open -0.632574 0.008981 -70.434 < 2e-16 ***
## High 0.905325 0.008120 111.499 < 2e-16 ***
## Low 0.727027 0.006584 110.420 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.51 on 6282 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 8.384e+07 on 3 and 6282 DF, p-value: < 2.2e-16
#Q9: Prediction Accuracy
data$Pred <- predict(model, data)
ggplot(data, aes(Close, Pred)) +
geom_point(color = "blue") +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_light()

#Q10: Pair Plot
data %>%
select(Open, High, Low, Close) %>%
ggpairs()

# Q11: Positive vs Negative Returns
pie_data1 <- data %>%
mutate(Return = (Close - lag(Close))/lag(Close),
Category = ifelse(Return > 0,"Positive","Negative")) %>%
count(Category)
ggplot(pie_data1, aes("", n, fill = Category)) +
geom_bar(stat = "identity") +
coord_polar("y") +
scale_fill_manual(values = c("green","red")) +
theme_void()

#Q12: Return Categories
pie_data2 <- data %>%
mutate(Return = (Close - lag(Close))/lag(Close),
Category = ifelse(Return > 0.02,"High",
ifelse(Return <- 0.02,"Low","Medium"))) %>%
count(Category)
ggplot(pie_data2, aes("", n, fill=Category)) +
geom_bar(stat = "identity") +
coord_polar("y") +
scale_fill_manual(values = c("green","yellow","red")) +
theme_void()

#Q13: Maximum Drawdown
data$Cum_Max <- cummax(data$Close)
data$Drawdown <- (data$Close - data$Cum_Max)/data$Cum_Max
ggplot(data, aes(Date, Drawdown)) +
geom_line(color = "red") +
theme_minimal()

# Q14: Growth of ₹1 Investment
data$Return <- (data$Close - lag(data$Close)) / lag(data$Close)
data$Return[is.na(data$Return)] <- 0
data$Cum_Return <- cumprod(1 + data$Return)
ggplot(data, aes(Date, Cum_Return)) +
geom_line(color = "darkgreen", size=1) +
labs(title = "Growth of ₹1 Investment", y = "Value") +
theme_minimal()
