Student Details

Hemanth Rangaswamy(s4069811)

Problem Statement

My objective during the assignment is to analyze the daily price fluctuations of the S&P 500 index and Bitcoin to assess if they follow a normal distribution pattern. This analysis is essential for risk assessment and precise price forecasting. By looking at and comparing how their prices change over time,I hope to uncover how they behave and what that means for making smart financial decisions.

# Loading necessary packages
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Data Preparation:

# Loading the datasets
sp_data <- read.csv("C:/Users/Hemanth Gowda/Downloads/S&P 500-1.csv")
b_data <- read.csv("C:/Users/Hemanth Gowda/Downloads/BTC-USD-1.csv")
head(sp_data)
head(b_data)
# Cleaning and preparing the data
print("Column names in S&P 500 Data:")
## [1] "Column names in S&P 500 Data:"
print(names(sp_data))
## [1] "Date"  "Price"
print("Column names in Bitcoin Data:")
## [1] "Column names in Bitcoin Data:"
print(names(b_data))
## [1] "Date"      "Adj.Close"
# I'll specify the column names for close prices
s500_CC <- "Price"  # Column name for S&P 500 close prices
bit_CC <- "Adj.Close"  # Updated column name for Bitcoin close prices
# I'll check if the 'Price' column exists in the S&P 500 dataset
if(s500_CC %in% names(sp_data)) {
     # For cleaning and converting the 'Price' column to numeric
    sp_data[[s500_CC]] <- as.numeric(gsub(",", "", sp_data[[s500_CC]]))
    # For converting 'Date' to Date format
    sp_data$Date <- as.Date(sp_data$Date, format = "%d/%m/%Y")
} else {
    stop("The 'Price' column does not exist in the S&P 500 dataset.")
}
# I'll check if the 'Adj.Close' column exists in the Bitcoin dataset
if(bit_CC %in% names(b_data)) {
    # To clean and convert the 'Adj.Close' column to numeric
    b_data[[bit_CC]] <- as.numeric(b_data[[bit_CC]])
    # For converting 'Date' to Date format
    b_data$Date <- as.Date(b_data$Date, format = "%d/%m/%Y")
} else {
    stop("The 'Adj.Close' column does not exist in the Bitcoin dataset.")
}
# I’ll check if any missing values popped up during the conversion
print("Checking for missing values after converting the S&P 500 data.")
## [1] "Checking for missing values after converting the S&P 500 data."
print(sum(is.na(sp_data[[s500_CC]])))
## [1] 0
print("Checking for missing values after converting the Bitcoin Data:")
## [1] "Checking for missing values after converting the Bitcoin Data:"
print(sum(is.na(b_data[[bit_CC]])))
## [1] 0
sp_data <- na.omit(sp_data)
b_data <- na.omit(b_data)

Task 1

# Descriptive Statistics for S&P 500 dataset
s500_mean <- mean(sp_data[[s500_CC]], na.rm = TRUE) # I'll calculate the mean of S&P 500 prices
s500_median <- median(sp_data[[s500_CC]], na.rm = TRUE) # I'll calculate the median of S&P 500 prices
s500_sd <- sd(sp_data[[s500_CC]], na.rm = TRUE) # I'll calculate the standard deviation of S&P 500 prices
s500_variance <- var(sp_data[[s500_CC]], na.rm = TRUE)# I'll calculate the variance of S&P 500 prices 
s500_min <- min(sp_data[[s500_CC]], na.rm = TRUE) # I'll find the minimum value of S&P 500 prices
s500_max <- max(sp_data[[s500_CC]], na.rm = TRUE) # I'll find the maximum value of S&P 500 prices
s500_range <- range(sp_data[[s500_CC]], na.rm = TRUE) # I'll find the range of S&P 500 prices
# Descriptive Statistics for Bitcoin dataset
b_mean <- mean(b_data[[bit_CC]], na.rm = TRUE) # I'll calculate the mean of Bitcoin prices
b_m <- median(b_data[[bit_CC]], na.rm = TRUE) # I'll calculate the median of Bitcoin prices
b_sd <- sd(b_data[[bit_CC]], na.rm = TRUE) # I'll calculate the standard deviation of Bitcoin prices
b_v <- var(b_data[[bit_CC]], na.rm = TRUE) # I'll calculate the variance of Bitcoin prices
b_min <- min(b_data[[bit_CC]], na.rm = TRUE) # I'll find the minimum value of Bitcoin prices
b_m <- max(b_data[[bit_CC]], na.rm = TRUE) # I'll find the maximum value of Bitcoin prices
b_range <- range(b_data[[bit_CC]], na.rm = TRUE) # I'll find the range of Bitcoin prices
# I'll create a list of statistical measures
stats <- c("Mean", "Median", "Standard Deviation", "Variance", "Minimum", "Maximum")
sp500_values <- c(s500_mean, s500_median, s500_sd, s500_variance, s500_min, s500_max)
btc_values <- c(b_mean, b_m, b_sd, b_v, b_min, b_m)
# I'll print the descriptive statistics for both datasets
for (i in 1:length(stats)) {
  print(paste("S&P 500", stats[i], ":", sp500_values[i]))
  print(paste("Bitcoin", stats[i], ":", btc_values[i]))
}
## [1] "S&P 500 Mean : 3826.55545695364"
## [1] "Bitcoin Mean : 26747.5258149608"
## [1] "S&P 500 Median : 3930.69"
## [1] "Bitcoin Median : 73083.5"
## [1] "S&P 500 Standard Deviation : 776.833654736138"
## [1] "Bitcoin Standard Deviation : 19162.9553157239"
## [1] "S&P 500 Variance : 603470.527130706"
## [1] "Bitcoin Variance : 367218856.432431"
## [1] "S&P 500 Minimum : 2237.4"
## [1] "Bitcoin Minimum : 3236.761719"
## [1] "S&P 500 Maximum : 5667.2"
## [1] "Bitcoin Maximum : 73083.5"
# I'll print the range for both datasets
print(paste("S&P 500 Range:", s500_range[1], "to", s500_range[2]))
## [1] "S&P 500 Range: 2237.4 to 5667.2"
print(paste("Bitcoin Range:", b_range[1], "to", b_range[2]))
## [1] "Bitcoin Range: 3236.761719 to 73083.5"
# I'll create a data frame to compare the descriptive statistics of both datasets
comparison <- data.frame(
    Measure = c("Mean", "Median", "Standard Deviation", "Variance", "Minimum", "Maximum", "Range (Min)", "Range (Max)"),
    S_P_500 = c(s500_mean, s500_median, s500_sd, s500_variance, s500_min, s500_max, s500_range[1], s500_range[2]),
    Bitcoin = c(b_mean, b_m, b_sd, b_v, b_min, b_m, b_range[1], b_range[2])
)
# I'll print the comparison data frame
print("Comparison of S&P 500 and Bitcoin Data:")
## [1] "Comparison of S&P 500 and Bitcoin Data:"
print(comparison)
##              Measure     S_P_500      Bitcoin
## 1               Mean   3826.5555 2.674753e+04
## 2             Median   3930.6900 7.308350e+04
## 3 Standard Deviation    776.8337 1.916296e+04
## 4           Variance 603470.5271 3.672189e+08
## 5            Minimum   2237.4000 3.236762e+03
## 6            Maximum   5667.2000 7.308350e+04
## 7        Range (Min)   2237.4000 3.236762e+03
## 8        Range (Max)   5667.2000 7.308350e+04

Descriptive analysis:

Key Differences:

Volatility: Bitcoin is more volatile than the S&P 500.

Price Range: Bitcoin: $3,236.76 to $73,083.50 , S&P 500: $2,237.40 to $5,667.20.

Mean: Bitcoin: $26,747.53 , S&P 500: $3,826.56.

Standard Deviation: Bitcoin: $19,162.96 , S&P 500: $776.83.

My Insight: Bitcoin’s price fluctuates more than the S&P 500, shown by its higher standard deviation and wider price range.

Task 2

# Resampling Bitcoin data and S&P 500 data to 6-month intervals and get the last non-NA value for each period.
b_resample <- b_data %>% 
  group_by(Date = floor_date(Date, "6 months")) %>%
  summarise(Adj_Close = tail(.data[[bit_CC]][!is.na(.data[[bit_CC]])], 1))
s_resample <- sp_data %>%
  group_by(Date = floor_date(Date, "6 months")) %>%
  summarise(Price = tail(.data[[s500_CC]][!is.na(.data[[s500_CC]])], 1))
# Merging the resampled S&P 500 and Bitcoin datasets based on the Date column.
merge_d <- inner_join(s_resample, b_resample, by = "Date")
# Calculating the correlation coefficient between the S&P 500 and Bitcoin.
merge_d <- merge_d %>%
  mutate(Correlation = cor(Price, Adj_Close, use = "complete.obs"))
# Plotting the trend of S&P 500 and Bitcoin over six years.
s500_plot <- ggplot(sp_data, aes(x = Date, y = .data[[s500_CC]])) +
  geom_line(color = "royalblue") +
  ggtitle("Six-Year S&P 500 Price Trend") +
  xlab("Date") + ylab("S&P 500 Price") +
  theme_minimal()
b_plot <- ggplot(b_data, aes(x = Date, y = .data[[bit_CC]])) +
  geom_line(color = "orange") +
  ggtitle("Six-Year Bitcoin Price Trend") +
  xlab("Date") + ylab("Bitcoin’s Adjusted Closing Value") +
  theme_minimal()
# Plotting the 6-month rolling correlation between S&P 500 and Bitcoin
c_plot <- ggplot(merge_d, aes(x = Date, y = Correlation)) +
  geom_line(color = "red") +
  ggtitle("Half-Year Correlation Analysis: S&P 500 and Bitcoin") +
  xlab("Date") + ylab("Correlation") +
  theme_minimal()
# Printing the plots
print(s500_plot, width = 4, height = 2)

print(b_plot, width = 4, height = 2)

print(c_plot, width = 4, height = 2)

Visual analysis:

S&P 500 Price Trend Over Six Years:

Bitcoin Price Trend Over Six Years:

6-Month Rolling Correlation Between S&P 500 and Bitcoin:

Task 3

# Merge S&P 500 and Bitcoin datasets by Date, resampling them to ensure alignment
merge_d <- inner_join(sp_data, b_data, by = "Date")
# Verify column names
print(colnames(merge_d))
## [1] "Date"      "Price"     "Adj.Close"
# Compute the correlation coefficient between S&P 500 and Bitcoin
correlation_coefficient <- cor(merge_d[[s500_CC]], merge_d[[bit_CC]], use = "complete.obs")
# Print the correlation coefficient
print(paste("Correlation Coefficient between S&P 500 and Bitcoin:", correlation_coefficient))
## [1] "Correlation Coefficient between S&P 500 and Bitcoin: 0.897674934899167"
# Scatter plot to visualize the relationship between S&P 500 and Bitcoin
s_plot <- ggplot(merge_d, aes(x = .data[[s500_CC]], y = .data[[bit_CC]])) +
  geom_point(color = "brown") +
  ggtitle("Market Behavior: S&P 500 vs. Bitcoin") +
  xlab("S&P 500 Price") + 
  ylab("Bitcoin Price") +
  theme_minimal()
# Print the scatter plot
print(s_plot, width = 4, height =2)

Analysis of Relationships:

Key Insights from Scatter Plot and Correlation Data:

Task 4

s500_CC <- "Price"
bit_CC <- "Adj.Close"
# Creating visualizations for normality
normality_p <- function(data, col_name, title, subtitle) {
  # Histogram with density
  h_plot <- ggplot(data, aes_string(x = col_name)) +
    geom_histogram(aes(y = after_stat(density)), binwidth = 50, fill = "white", color = "black", alpha = 0.7) +
    geom_density(color = "orange", size = 1) +
    labs(title = paste("Density and Distribution Plot", title),
         subtitle = subtitle,
         x = col_name, 
         y = "Density") +
    theme_minimal() +
    theme(plot.title = element_text(size = 12),
          plot.subtitle = element_text(size = 10))
  # Q-Q plot : Gives clear visual insight into the distribution of the data 
  Qplot <- ggplot(data, aes_string(sample = col_name)) +
    geom_qq() +
    geom_qq_line(color = "orange") +
    labs(title = paste(" Q-Q Plot of", title),
         subtitle = subtitle,
         x = " Expected Quantiles", 
         y = " Actual Quantiles") +
    theme_minimal() +
    theme(plot.title = element_text(size = 12),
          plot.subtitle = element_text(size = 10))
  # Arrange the histogram and Q-Q plot side by side
  grid.arrange(h_plot, Qplot, ncol = 2, widths = c(3, 3))
}
# Normality Plot for S&P 500
normality_p(sp_data, s500_CC, "S&P 500", "Traditional Stock Market Index")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Normality Plot for Bitcoin
normality_p(b_data, bit_CC, "Bitcoin", "Cryptocurrency Market")

Fitting a Normal Distribution

S&P 500 Analysis:

Bitcoin Analysis:

Comparing the Two:

The S&P 500 is much closer to a normal distribution compared to Bitcoin. Bitcoin displays far more extreme price behavior, especially on the upside. The S&P 500’s distribution suggests more stable and predictable price movements, while Bitcoin’s distribution is volatile and skewed. Both assets deviate from normality, but Bitcoin’s deviation is a lot clear.

References:

[1] Module 1, astral-theory-157510.appspot.com, 2024. [Online]. Available: https://astral-theory-157510.appspot.com

[2] Module 2, astral-theory-157510.appspot.com, 2024. [Online]. Available: https://astral-theory-157510.appspot.com

[3] Module 4, astral-theory-157510.appspot.com, 2024. [Online]. Available: https://astral-theory-157510.appspot.com

[4] QuillBot, “Paraphrasing Tool for Enhanced Writing,” 2024. [Online]. Available: https://quillbot.com

[5] R Project, “Other R Documentation,” The R Project for Statistical Computing. [Online]. Available: https://www.r-project.org/other-docs.html.

[6] RMIT University, “Assignment Cover Sheet,” RMIT University, Melbourne, Australia, 2024. [Online]. Available: https://www.rmit.edu.au/content/dam/rmit/au/en/students/documents/my-course/assessment-results/assignment-cover-sheet.pdf.

[7] R. Kabacoff, “Data Visualization Advice - Y-Axis Scaling,” 2024. [Online]. Available: https://rkabacoff.github.io/datavis/Advice.html#y-axis-scaling

[8] RMIT University, “MATH1324 Assignment 1 Datasets,” RMIT Canvas, 2024. [Online]. Available: https://rmit.instructure.com/courses/124219/assignments/969585

[9] W3Schools, “Statistics - Normal Distribution,” W3Schools.com. https://www.w3schools.com/statistics/statistics_normal_distribution.php.

[10] W3Schools, “Statistics - Descriptive Statistics,” W3Schools.com. https://www.w3schools.com/statistics/statistics_descriptive_statistics.php.

[11] W3Schools, “R - Scatter Plot,” 2024. [Online]. Available: https://www.w3schools.com/r/r_graph_scatterplot.asp