# Install required packages (if not already installed)
if (!require(quantmod)) install.packages("quantmod")
if (!require(tidyverse)) install.packages("tidyverse")
if (!require(lubridate)) install.packages("lubridate")
# Load the libraries
library(quantmod)
library(tidyverse)
library(lubridate)
# Define stock list for all sectors
stock_list <- c(
# Consumer Staple Sector
"INGR", "TR", "FLO", "STKL", "JJSF", "WILC", "TSN", "CHD", "GIS", "ADM", "EL", "HSY", # 12 Stocks
# Energy Sector
"NFG", "SBR", "NRT", "LEU", "REPX", "SHEL", "TTE", "XOM", "EOG", "WMB", "VLO", # 11 Stocks
# Financial Sector
"ESGR", "WTM", "RLI", "BUSE", "CNOB", "GL", "RY", "AFL", "BK", "USB", "ING", "BLK", # 12 Stocks
# Healthcare Sector
"DGX", "UTMD", "ATR", "ERNA", "PETS", "RMD", "ABT", "MDT", "COR", "NVO", "BIO", "RDNT", # 12 Stocks
# Industrials Sector
"BBSI", "CWST", "MMS", "AIT", "EBF", "AAON", "ITW", "RSG", "CP", "BA", "LUV", "CHRW", # 12 Stocks
# Technology Sector
"DOX", "ADTN", "POWI", "CTLP", "ROP", "TDY", "MSI", "GLW", "MU", "STM" # 10 Stocks
)
# Set start and end dates
start_date <- as.Date("2000-01-01")
end_date <- as.Date("2025-05-01")
# Initialize an empty list to store stock data
stock_data_list <- list()
# Define your folder path (Windows style)
output_folder <- "C:/Users/User/Desktop/Master Thesis/Imported 69 Stocks Datasets"
# Create folder if it doesn't exist
if (!dir.exists(output_folder)) dir.create(output_folder, recursive = TRUE)
# Loop to fetch and save each stock
for (symbol in stock_list) {
cat("📥 Fetching:", symbol, "\n")
tryCatch({
# Fetch data
getSymbols(symbol, src = "yahoo", from = start_date, to = end_date, auto.assign = TRUE)
# Convert to data.frame and add Date column
df <- get(symbol)
df <- data.frame(Date = index(df), coredata(df))
# Save to CSV
file_path <- file.path(output_folder, paste0(symbol, ".csv"))
write.csv(df, file = file_path, row.names = FALSE)
# Clean up
rm(list = symbol, envir = .GlobalEnv)
Sys.sleep(2) # Wait to avoid being rate-limited
}, error = function(e) {
cat("❌ Failed:", symbol, "—", conditionMessage(e), "\n")
})
}
## 📥 Fetching: INGR
## 📥 Fetching: TR
## 📥 Fetching: FLO
## 📥 Fetching: STKL
## 📥 Fetching: JJSF
## 📥 Fetching: WILC
## 📥 Fetching: TSN
## 📥 Fetching: CHD
## 📥 Fetching: GIS
## 📥 Fetching: ADM
## 📥 Fetching: EL
## 📥 Fetching: HSY
## 📥 Fetching: NFG
## 📥 Fetching: SBR
## 📥 Fetching: NRT
## 📥 Fetching: LEU
## 📥 Fetching: REPX
## 📥 Fetching: SHEL
## 📥 Fetching: TTE
## 📥 Fetching: XOM
## 📥 Fetching: EOG
## 📥 Fetching: WMB
## 📥 Fetching: VLO
## 📥 Fetching: ESGR
## 📥 Fetching: WTM
## 📥 Fetching: RLI
## 📥 Fetching: BUSE
## 📥 Fetching: CNOB
## 📥 Fetching: GL
## 📥 Fetching: RY
## 📥 Fetching: AFL
## 📥 Fetching: BK
## 📥 Fetching: USB
## 📥 Fetching: ING
## 📥 Fetching: BLK
## 📥 Fetching: DGX
## 📥 Fetching: UTMD
## 📥 Fetching: ATR
## 📥 Fetching: ERNA
## 📥 Fetching: PETS
## 📥 Fetching: RMD
## 📥 Fetching: ABT
## 📥 Fetching: MDT
## 📥 Fetching: COR
## 📥 Fetching: NVO
## 📥 Fetching: BIO
## 📥 Fetching: RDNT
## 📥 Fetching: BBSI
## 📥 Fetching: CWST
## 📥 Fetching: MMS
## 📥 Fetching: AIT
## 📥 Fetching: EBF
## 📥 Fetching: AAON
## 📥 Fetching: ITW
## 📥 Fetching: RSG
## 📥 Fetching: CP
## 📥 Fetching: BA
## 📥 Fetching: LUV
## 📥 Fetching: CHRW
## 📥 Fetching: DOX
## 📥 Fetching: ADTN
## 📥 Fetching: POWI
## 📥 Fetching: CTLP
## 📥 Fetching: ROP
## 📥 Fetching: TDY
## 📥 Fetching: MSI
## 📥 Fetching: GLW
## 📥 Fetching: MU
## 📥 Fetching: STM
# Set path to your folder
data_path <- "C:/Users/User/Desktop/Master Thesis/Imported 69 Stocks Datasets"
# Choose 3 example tickers
tickers_to_check <- c("AAON", "XOM") # Replace with any symbols you've saved
# Read and show head of each
for (ticker in tickers_to_check) {
file_path <- file.path(data_path, paste0(ticker, ".csv"))
if (file.exists(file_path)) {
cat("🔎 Head of:", ticker, "\n")
df <- read.csv(file_path)
print(head(df))
cat("\n----------------------------\n")
} else {
cat("❌ File not found for:", ticker, "\n")
}
}
## 🔎 Head of: AAON
## Date AAON.Open AAON.High AAON.Low AAON.Close AAON.Volume AAON.Adjusted
## 1 2000-01-03 0.863283 0.863283 0.790123 0.790123 1026865 0.6525961
## 2 2000-01-04 0.855967 0.877915 0.797439 0.823045 997819 0.6797879
## 3 2000-01-05 0.841335 0.841335 0.812071 0.830361 7205140 0.6858303
## 4 2000-01-06 0.855967 0.863283 0.826703 0.834019 1515523 0.6888517
## 5 2000-01-07 0.826703 0.826703 0.804755 0.804755 350262 0.6646813
## 6 2000-01-10 0.804755 0.819387 0.804755 0.804755 102516 0.6646813
##
## ----------------------------
## 🔎 Head of: XOM
## Date XOM.Open XOM.High XOM.Low XOM.Close XOM.Volume XOM.Adjusted
## 1 2000-01-03 39.75000 40.37500 38.93750 39.15625 13458200 17.73023
## 2 2000-01-04 38.68750 39.09375 38.25000 38.40625 14510800 17.39063
## 3 2000-01-05 39.00000 40.87500 38.90625 40.50000 17485000 18.33869
## 4 2000-01-06 40.31250 42.90625 40.09375 42.59375 19461600 19.28675
## 5 2000-01-07 42.96875 43.12500 42.00000 42.46875 16603800 19.23016
## 6 2000-01-10 42.09375 42.31250 41.78125 41.87500 8545400 18.96131
##
## ----------------------------
# Define your folder with CSV files
data_path <- "C:/Users/User/Desktop/Master Thesis/Imported 69 Stocks Datasets"
# List all CSV files
file_list <- list.files(data_path, pattern = "\\.csv$", full.names = TRUE)
# Initialize empty list to store metadata
stock_summary <- list()
# Loop through files
for (file in file_list) {
df <- read.csv(file)
symbol <- tools::file_path_sans_ext(basename(file))
# Convert Date to proper format
df$Date <- as.Date(df$Date)
summary_row <- data.frame(
Symbol = symbol,
Start_Date = min(df$Date, na.rm = TRUE),
End_Date = max(df$Date, na.rm = TRUE),
Num_Rows = nrow(df),
Num_Columns = ncol(df)
)
stock_summary[[symbol]] <- summary_row
}
# Combine all into one data frame
summary_table <- do.call(rbind, stock_summary)
# View the table
print(summary_table)
## Symbol Start_Date End_Date Num_Rows Num_Columns
## AAON AAON 2000-01-03 2025-04-30 6370 7
## ABT ABT 2000-01-03 2025-04-30 6370 7
## ADM ADM 2000-01-03 2025-04-30 6370 7
## ADTN ADTN 2000-01-03 2025-04-30 6370 7
## AFL AFL 2000-01-03 2025-04-30 6370 7
## AIT AIT 2000-01-03 2025-04-30 6370 7
## ATR ATR 2000-01-03 2025-04-30 6370 7
## BA BA 2000-01-03 2025-04-30 6370 7
## BBSI BBSI 2000-01-03 2025-04-30 6370 7
## BIO BIO 2000-01-03 2025-04-30 6370 7
## BK BK 2000-01-03 2025-04-30 6370 7
## BLK BLK 2000-01-03 2025-04-30 6370 7
## BUSE BUSE 2000-01-03 2025-04-30 6370 7
## CHD CHD 2000-01-03 2025-04-30 6370 7
## CHRW CHRW 2000-01-03 2025-04-30 6370 7
## CNOB CNOB 2000-01-03 2025-04-30 6370 7
## COR COR 2000-01-03 2025-04-30 6370 7
## CP CP 2000-01-03 2025-04-30 6370 7
## CTLP CTLP 2000-01-03 2025-04-30 6370 7
## CWST CWST 2000-01-03 2025-04-30 6370 7
## DGX DGX 2000-01-03 2025-04-30 6370 7
## DOX DOX 2000-01-03 2025-04-30 6370 7
## EBF EBF 2000-01-03 2025-04-30 6370 7
## EL EL 2000-01-03 2025-04-30 6370 7
## EOG EOG 2000-01-03 2025-04-30 6370 7
## ERNA ERNA 2000-01-03 2025-04-30 6370 7
## ESGR ESGR 2000-01-03 2025-04-30 6370 7
## FLO FLO 2000-01-03 2025-04-30 6370 7
## GIS GIS 2000-01-03 2025-04-30 6370 7
## GL GL 2000-01-03 2025-04-30 6370 7
## GLW GLW 2000-01-03 2025-04-30 6370 7
## HSY HSY 2000-01-03 2025-04-30 6370 7
## ING ING 2000-01-03 2025-04-30 6370 7
## INGR INGR 2000-01-03 2025-04-30 6370 7
## ITW ITW 2000-01-03 2025-04-30 6370 7
## JJSF JJSF 2000-01-03 2025-04-30 6370 7
## LEU LEU 2000-01-03 2025-04-30 6370 7
## LUV LUV 2000-01-03 2025-04-30 6370 7
## MDT MDT 2000-01-03 2025-04-30 6370 7
## MMS MMS 2000-01-03 2025-04-30 6370 7
## MSI MSI 2000-01-03 2025-04-30 6370 7
## MU MU 2000-01-03 2025-04-30 6370 7
## NFG NFG 2000-01-03 2025-04-30 6370 7
## NRT NRT 2000-01-03 2025-04-30 6370 7
## NVO NVO 2000-01-03 2025-04-30 6370 7
## PETS PETS 2000-01-03 2025-04-30 6370 7
## POWI POWI 2000-01-03 2025-04-30 6370 7
## RDNT RDNT 2000-01-03 2025-04-30 6370 7
## REPX REPX 2000-01-03 2025-04-30 6370 7
## RLI RLI 2000-01-03 2025-04-30 6370 7
## RMD RMD 2000-01-03 2025-04-30 6370 7
## ROP ROP 2000-01-03 2025-04-30 6370 7
## RSG RSG 2000-01-03 2025-04-30 6370 7
## RY RY 2000-01-03 2025-04-30 6370 7
## SBR SBR 2000-01-03 2025-04-30 6370 7
## SHEL SHEL 2000-01-03 2025-04-30 6370 7
## STKL STKL 2000-01-03 2025-04-30 6370 7
## STM STM 2000-01-03 2025-04-30 6370 7
## TDY TDY 2000-01-03 2025-04-30 6370 7
## TR TR 2000-01-03 2025-04-30 6370 7
## TSN TSN 2000-01-03 2025-04-30 6370 7
## TTE TTE 2000-01-03 2025-04-30 6370 7
## USB USB 2000-01-03 2025-04-30 6370 7
## UTMD UTMD 2000-01-03 2025-04-30 6370 7
## VLO VLO 2000-01-03 2025-04-30 6370 7
## WILC WILC 2000-01-03 2025-04-30 6370 7
## WMB WMB 2000-01-03 2025-04-30 6370 7
## WTM WTM 2000-01-03 2025-04-30 6370 7
## XOM XOM 2000-01-03 2025-04-30 6370 7
# Optionally: save to Excel or CSV
# write.csv(summary_table, "C:/Users/User/Desktop/Master Thesis/Stock_Metadata_Summary.csv", row.names = FALSE)
# Initialize list to store results
sd_list <- list()
# Loop through files
for (file in file_list) {
df <- read.csv(file)
symbol <- tools::file_path_sans_ext(basename(file))
# Find the Adjusted column automatically
adj_col <- grep("Adjusted", colnames(df), value = TRUE)
# Calculate standard deviation if column found
if (length(adj_col) == 1) {
sd_value <- sd(df[[adj_col]], na.rm = TRUE)
sd_list[[symbol]] <- data.frame(Symbol = symbol, SD_AdjClose = sd_value)
} else {
warning(paste("Adjusted column not found in", symbol))
}
}
# Combine results into a table
sd_table <- do.call(rbind, sd_list)
# View the table
print(sd_table)
## Symbol SD_AdjClose
## AAON AAON 24.364666
## ABT ABT 36.154382
## ADM ADM 19.321522
## ADTN ADTN 5.625170
## AFL AFL 22.723403
## AIT AIT 51.945713
## ATR ATR 42.266309
## BA BA 94.934750
## BBSI BBSI 9.377171
## BIO BIO 172.324655
## BK BK 13.069509
## BLK BLK 256.927560
## BUSE BUSE 7.250348
## CHD CHD 32.551500
## CHRW CHRW 27.828924
## CNOB CNOB 6.924966
## COR COR 60.929048
## CP CP 25.804406
## CTLP CTLP 43.913098
## CWST CWST 29.683199
## DGX DGX 39.668342
## DOX DOX 22.521396
## EBF EBF 4.672930
## EL EL 79.858653
## EOG EOG 34.826448
## ERNA ERNA 1849.020196
## ESGR ESGR 82.268402
## FLO FLO 7.421347
## GIS GIS 19.191397
## GL GL 34.417994
## GLW GLW 12.069166
## HSY HSY 60.804775
## ING ING 4.992329
## INGR INGR 36.056674
## ITW ITW 74.266849
## JJSF JJSF 54.599152
## LEU LEU 1234.243261
## LUV LUV 15.441272
## MDT MDT 25.508350
## MMS MMS 28.327260
## MSI MSI 98.998095
## MU MU 27.759921
## NFG NFG 15.749758
## NRT NRT 2.706772
## NVO NVO 28.580310
## PETS PETS 8.615493
## POWI POWI 24.690676
## RDNT RDNT 14.039897
## REPX REPX 271.000588
## RLI RLI 21.141405
## RMD RMD 76.456254
## ROP ROP 169.382860
## RSG RSG 51.898932
## RY RY 30.195858
## SBR SBR 16.489780
## SHEL SHEL 13.685024
## STKL STKL 3.215211
## STM STM 12.633095
## TDY TDY 150.274834
## TR TR 6.059904
## TSN TSN 23.510750
## TTE TTE 13.745085
## USB USB 12.362910
## UTMD UTMD 29.333319
## VLO VLO 38.320732
## WILC WILC 4.529483
## WMB WMB 10.685359
## WTM WTM 413.716242
## XOM XOM 24.621506
# Initialize list to store missing value counts
missing_list <- list()
# Loop through each file
for (file in file_list) {
df <- read.csv(file)
symbol <- tools::file_path_sans_ext(basename(file))
# Count total NA values in entire dataframe
na_count <- sum(is.na(df))
# Optionally: count per column if needed with colSums(is.na(df))
missing_list[[symbol]] <- data.frame(Symbol = symbol, Missing_Values = na_count)
}
# Combine into one data frame
missing_table <- do.call(rbind, missing_list)
# View result
print(missing_table)
## Symbol Missing_Values
## AAON AAON 0
## ABT ABT 0
## ADM ADM 0
## ADTN ADTN 0
## AFL AFL 0
## AIT AIT 0
## ATR ATR 0
## BA BA 0
## BBSI BBSI 0
## BIO BIO 0
## BK BK 0
## BLK BLK 0
## BUSE BUSE 0
## CHD CHD 0
## CHRW CHRW 0
## CNOB CNOB 0
## COR COR 0
## CP CP 0
## CTLP CTLP 0
## CWST CWST 0
## DGX DGX 0
## DOX DOX 0
## EBF EBF 0
## EL EL 0
## EOG EOG 0
## ERNA ERNA 0
## ESGR ESGR 0
## FLO FLO 0
## GIS GIS 0
## GL GL 0
## GLW GLW 0
## HSY HSY 0
## ING ING 0
## INGR INGR 0
## ITW ITW 0
## JJSF JJSF 0
## LEU LEU 0
## LUV LUV 0
## MDT MDT 0
## MMS MMS 0
## MSI MSI 0
## MU MU 0
## NFG NFG 0
## NRT NRT 0
## NVO NVO 0
## PETS PETS 0
## POWI POWI 0
## RDNT RDNT 0
## REPX REPX 0
## RLI RLI 0
## RMD RMD 0
## ROP ROP 0
## RSG RSG 0
## RY RY 0
## SBR SBR 0
## SHEL SHEL 0
## STKL STKL 0
## STM STM 0
## TDY TDY 0
## TR TR 0
## TSN TSN 0
## TTE TTE 0
## USB USB 0
## UTMD UTMD 0
## VLO VLO 0
## WILC WILC 0
## WMB WMB 0
## WTM WTM 0
## XOM XOM 0
ALL 69 STOCKS ARE FETCHED SUCCESSFULLY FROM YAHOOFINANCE AND READY FOR ANALYSIS. THE DATASETS ARE FREE FROM MISSING VALUES AND SUFFICIENT FOR ANALYSIS.