Magistrska naloga final

R Markdown

library(readxl)

## Warning: package 'readxl' was built under R version 4.3.2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

us_sample <- read_excel("~/IMB/Magistrska naloga/Sample/Sample/US sample - končni.xlsx")
View(us_sample)

# (če si že prej bral v objektu z znakom č, ga lahko premapiraš)
# us_sample <- `US_sample_končni`

# 2) Prvi stolpec so datumi
#    Poskusi pretvorbo v Date, ohrani prvotni stolpec 'Date'
if (!inherits(us_sample[[1]], "Date")) {
  # Poskusi avtomatsko pretvorbo
  us_sample[[1]] <- as.Date(us_sample[[1]])
}
colnames(us_sample)[1] <- "Date"

# 3) Definiraj Y in X po pozicijah:
#    - Y = prvih 80 stolpcev po datumu (tj. stolpci 2:81)
#    - X = zadnjih 6 stolpcev
y_cols <- colnames(us_sample)[2:81]
x_cols <- tail(colnames(us_sample), 6)

# 4) Izpisi, kar želiš:
cat("Ime stolpca z datumi:", colnames(us_sample)[1], "\n\n")

## Ime stolpca z datumi: Date

cat("Število Y spremenljivk:", length(y_cols), "\n")

## Število Y spremenljivk: 80

cat("Prva Y spremenljivka:", y_cols[1], "\n")

## Prva Y spremenljivka: DODGX

cat("Zadnja Y spremenljivka:", y_cols[length(y_cols)], "\n\n")

## Zadnja Y spremenljivka: LSVQX

cat("Število X spremenljivk:", length(x_cols), "\n")

## Število X spremenljivk: 6

cat("Imena X spremenljivk:\n")

## Imena X spremenljivk:

print(x_cols)

## [1] "Russell_1000_Growth" "Russell_1000_Value"  "Russell_2000_Growth"
## [4] "Russell_2000_Value"  "Bond_US"             "T-Bill_US"

# (neobvezno) pripravi matrike za nadaljevanje analize
Y_mat <- as.matrix(us_sample[, y_cols])
X_mat <- as.matrix(us_sample[, x_cols])

# Hitri preverbi dimenzij
cat("\nDimenzije Y matrike:", paste(dim(Y_mat), collapse = " x "), "\n")

## 
## Dimenzije Y matrike: 108 x 80

cat("Dimenzije X matrike:", paste(dim(X_mat), collapse = " x "), "\n")

## Dimenzije X matrike: 108 x 6

# Predpostavlja, da imaš objekt `us_sample` in vektor `y_cols` iz prejšnjega koraka
# y_cols = names(us_sample)[2:81]

# 1) Indeksi skupin (po 20 stolpcev)
idx_Large_Value  <- 1:20
idx_Large_Growth <- 21:40
idx_Small_Growth <- 41:60
idx_Small_Value  <- 61:80

# 2) Vzemi tickers po skupinah
Large_Value  <- y_cols[idx_Large_Value]
Large_Growth <- y_cols[idx_Large_Growth]
Small_Growth <- y_cols[idx_Small_Growth]
Small_Value  <- y_cols[idx_Small_Value]



cat("\n")

# 4) Izpis skladov v vsaki skupini
cat("Skupina Large_Value (20):\n")

## Skupina Large_Value (20):

print(Large_Value); cat("\n")

##  [1] "DODGX" "AMRMX" "VWNAX" "MEIAX" "LBSAX" "OIEJX" "PEQSX" "TRPIX" "BBVLX"
## [10] "OAKMX" "NBHIX" "MUBFX" "SSHVX" "EIUTX" "PEQIX" "IEDIX" "EITVX" "CGRNX"
## [19] "PCPAX" "FLUIX"

cat("Skupina Large_Growth (20):\n")

## Skupina Large_Growth (20):

print(Large_Growth); cat("\n")

##  [1] "RGAGX" "FCNKX" "SEEGX" "TBCIX" "VWUSX" "MFEIX" "FMGKX" "BBGLX" "HACAX"
## [10] "FDTRX" "NVLIX" "PLCIX" "JAGRX" "BFTIX" "TGCEX" "WPSIX" "ALVOX" "MMDEX"
## [19] "CCWRX" "CMLIX"

cat("Skupina Small_Growth (20):\n")

## Skupina Small_Growth (20):

print(Small_Growth); cat("\n")

##  [1] "VSGAX" "NBGIX" "OTIIX" "FCAGX" "JSMGX" "ODIYX" "BSFIX" "FKAIX" "JVTNX"
## [10] "QUAIX" "WBSIX" "AGOZX" "BAFSX" "VLEIX" "PXQSX" "MRRGX" "OISGX" "PSGIX"
## [19] "ALFYX" "WSCGX"

cat("Skupina Small_Value (20):\n")

## Skupina Small_Value (20):

print(Small_Value); cat("\n")

##  [1] "DFSVX" "UBVSX" "VSOIX" "FCVIX" "ESPNX" "ABSYX" "FVADX" "DEVIX" "NDVIX"
## [10] "ARAIX" "RTRIX" "AFDZX" "DHMYX" "CRRYX" "HNTVX" "GSITX" "HWVIX" "AVALX"
## [19] "BPSIX" "LSVQX"

# (neobvezno) Če želiš tabelo preslikave sklad -> skupina:
library(tibble)
mapping_tbl <- tibble(
  fund = c(Large_Value, Large_Growth, Small_Growth, Small_Value),
  group = c(rep("Large_Value",  length(Large_Value)),
            rep("Large_Growth", length(Large_Growth)),
            rep("Small_Growth", length(Small_Growth)),
            rep("Small_Value",  length(Small_Value)))
)
# View(mapping_tbl)  # odkomentiraj, če želiš pregled v RStudio

# 1) Izlušči samo X spremenljivke
X_data <- us_sample[, x_cols]

# 2) Izračun korelacijske matrike (Pearson)
cor_mat <- cor(X_data, use = "pairwise.complete.obs", method = "pearson")

# 3) Zaokroži na 3 decimalke
cor_mat_round <- round(cor_mat, 3)

# 4) Izpis
print(cor_mat_round)

##                     Russell_1000_Growth Russell_1000_Value Russell_2000_Growth
## Russell_1000_Growth               1.000              0.789               0.824
## Russell_1000_Value                0.789              1.000               0.854
## Russell_2000_Growth               0.824              0.854               1.000
## Russell_2000_Value                0.693              0.917               0.912
## Bond_US                           0.444              0.327               0.359
## T-Bill_US                         0.040             -0.070              -0.047
##                     Russell_2000_Value Bond_US T-Bill_US
## Russell_1000_Growth              0.693   0.444     0.040
## Russell_1000_Value               0.917   0.327    -0.070
## Russell_2000_Growth              0.912   0.359    -0.047
## Russell_2000_Value               1.000   0.263    -0.131
## Bond_US                          0.263   1.000     0.051
## T-Bill_US                       -0.131   0.051     1.000

# (neobvezno) lepša vizualizacija z grafom:
# install.packages("corrplot") # če še nimaš
library(corrplot)

## Warning: package 'corrplot' was built under R version 4.3.2

## corrplot 0.92 loaded

corrplot(cor_mat, method = "color", type = "upper", 
         tl.col = "black", tl.srt = 45, addCoef.col = "black")

# RBSA (QP) za VSGAX, 36 mesecev: 2016-01-31 .. 2018-12-31
# Predpogoji v okolju: us_sample, x_cols
# OPOMBA: v solve.QP mora biti ENAKOSTNA omejitev PRVA kolona v Amat!

library(quadprog)

# 1) Izberi striktno vrstice 2:37 (tvoj Excel/zahteva)
sub <- us_sample[2:37, ]

# 2) Pripravi y in X (brez intercepta)
fund <- "VSGAX"
y <- as.numeric(sub[[fund]])
X <- as.matrix(sub[, x_cols, drop = FALSE])

# 3) Odstrani morebitne NA vrstice
ok <- stats::complete.cases(cbind(y, X))
y  <- y[ok]
X  <- X[ok, , drop = FALSE]

# 4) Kvadratno programiranje: min ||y - X w||^2  s.t. w >= 0, 1' w = 1
K    <- ncol(X)
Dmat <- t(X) %*% X
dvec <- t(X) %*% y

# ENAKOST (1' w = 1) kot PRVA kolona, nato neenakosti w_i >= 0
Amat <- cbind(rep(1, K), diag(K))      # (K+1) stolpcev omejitev
bvec <- c(1, rep(0, K))
meq  <- 1

# Stabilizacija (če je XtX skoraj singularen)
Dmat <- as.matrix(Dmat + diag(1e-8, K))

fit <- solve.QP(Dmat = Dmat, dvec = as.numeric(dvec),
                Amat = Amat, bvec = bvec, meq = meq)

w <- as.numeric(fit$solution)
names(w) <- colnames(X)

# (varnostna normalizacija proti numeričnemu šumu)
w[w < 0] <- 0
w <- w / sum(w)

# 5) Metri ke prileganja (R^2 brez intercepta)
y_hat <- as.vector(X %*% w)
SSR   <- sum((y - y_hat)^2)
SSTO  <- sum((y - mean(y))^2)
R2    <- 1 - SSR / SSTO

# 6) Izpis rezultatov
cat("RBSA —", fund, "(2016-01-31 do 2018-12-31)\n")

## RBSA — VSGAX (2016-01-31 do 2018-12-31)

cat("Opazovanj:", length(y), "\n")

## Opazovanj: 36

cat("Vsota uteži:", round(sum(w), 10), " | Min utež:", round(min(w), 10), "\n")

## Vsota uteži: 1  | Min utež: 0

cat("R^2:", round(R2, 6), "\n\n")

## R^2: 0.979123

cat("Uteži po indeksih:\n")

## Uteži po indeksih:

print(round(w, 6))

## Russell_1000_Growth  Russell_1000_Value Russell_2000_Growth  Russell_2000_Value 
##            0.229516            0.000000            0.759583            0.000000 
##             Bond_US           T-Bill_US 
##            0.010901            0.000000

library(quadprog)
library(dplyr)
library(purrr)
library(tibble)
library(openxlsx)   # recommended Excel writer

## Warning: package 'openxlsx' was built under R version 4.3.3

# 1) Select the 36-month window
sub <- us_sample %>%
  filter(Date >= as.Date("2016-01-31") & Date <= as.Date("2018-12-31"))
# alternatively: sub <- us_sample[2:37, ]

# 2) Function for RBSA on one fund
rbsa_one <- function(fund, data, x_cols) {
  y <- as.numeric(data[[fund]])
  X <- as.matrix(data[, x_cols, drop = FALSE])
  
  ok <- complete.cases(cbind(y, X))
  y  <- y[ok]; X <- X[ok, , drop = FALSE]
  
  K <- ncol(X)
  Dmat <- t(X) %*% X
  dvec <- t(X) %*% y
  
  Amat <- cbind(rep(1, K), diag(K))
  bvec <- c(1, rep(0, K))
  meq  <- 1
  
  Dmat <- as.matrix(Dmat + diag(1e-8, K))
  
  fit <- solve.QP(Dmat = Dmat, dvec = as.numeric(dvec),
                  Amat = Amat, bvec = bvec, meq = meq)
  
  w <- as.numeric(fit$solution)
  w[w < 0] <- 0
  w <- w / sum(w)
  names(w) <- x_cols
  
  y_hat <- as.vector(X %*% w)
  SSR   <- sum((y - y_hat)^2)
  SSTO  <- sum((y - mean(y))^2)
  R2    <- 1 - SSR/SSTO
  RMSE  <- sqrt(mean((y - y_hat)^2))
  
  # round everything to 4 decimals
  tibble(
    Fund = fund,
    Nobs = length(y),
    R2   = round(R2, 4),
    RMSE = round(RMSE, 4),
    !!!lapply(as.list(w), function(x) round(x, 4))
  )
}

# 3) Run analysis for all Small_Growth funds
results_small_growth <- map_dfr(Small_Growth, rbsa_one, data = sub, x_cols = x_cols)

# 4) Save to Excel
write.xlsx(results_small_growth, file = "RBSA_SmallGrowth_2016_2018V2.xlsx")

## Warning in file.create(to[okay]): cannot create file
## 'RBSA_SmallGrowth_2016_2018V2.xlsx', reason 'Permission denied'

# Quick preview
print(results_small_growth)

## # A tibble: 20 × 10
##    Fund   Nobs    R2   RMSE Russell_1000_Growth Russell_1000_Value
##    <chr> <int> <dbl>  <dbl>               <dbl>              <dbl>
##  1 VSGAX    36 0.984 0.0054              0.218              0     
##  2 NBGIX    36 0.930 0.0102              0.102              0.294 
##  3 OTIIX    36 0.973 0.0065              0.0452             0.0521
##  4 FCAGX    36 0.944 0.0105              0.282              0     
##  5 JSMGX    36 0.974 0.0066              0.349              0.023 
##  6 ODIYX    36 0.925 0.0123              0.331              0     
##  7 BSFIX    36 0.944 0.0102              0.421              0.0155
##  8 FKAIX    36 0.856 0.0187              0.249              0     
##  9 JVTNX    36 0.972 0.0072              0.268              0     
## 10 QUAIX    36 0.926 0.0139              0.102              0     
## 11 WBSIX    36 0.917 0.0129              0                  0     
## 12 AGOZX    36 0.772 0.0264              0.115              0     
## 13 BAFSX    36 0.932 0.01                0.318              0     
## 14 VLEIX    36 0.931 0.0095              0.138              0.232 
## 15 PXQSX    36 0.855 0.0155              0.146              0.161 
## 16 MRRGX    36 0.930 0.0107              0.0233             0.212 
## 17 OISGX    36 0.947 0.0109              0.272              0     
## 18 PSGIX    36 0.976 0.0074              0.0014             0     
## 19 ALFYX    36 0.98  0.0057              0.197              0     
## 20 WSCGX    36 0.966 0.0092              0.0411             0     
## # ℹ 4 more variables: Russell_2000_Growth <dbl>, Russell_2000_Value <dbl>,
## #   Bond_US <dbl>, `T-Bill_US` <dbl>

# ===== Batch RBSA za VSE sklade in POVPREČJA po stilih =====
# Predpogoji v okolju: us_sample, x_cols, ter vektorji:
#   Large_Value, Large_Growth, Small_Value, Small_Growth
# Zahtevano okno: 2016-01-31 .. 2018-12-31
# Rezultate zaokrožimo na 4 decimale in shranimo v Excel.

library(quadprog)
library(dplyr)
library(purrr)
library(tibble)
library(openxlsx)

# --- 1) Časovno okno (36 mesecev) ---
sub <- us_sample %>%
  filter(Date >= as.Date("2016-01-31") & Date <= as.Date("2018-12-31"))
# (alternativa, če želiš striktno vrstice 2:37)
# sub <- us_sample[2:37, ]

# --- 2) Preslikava sklad -> stil ---
style_map <- tibble(
  Fund  = c(Large_Value, Large_Growth, Small_Value, Small_Growth),
  Style = c(rep("Large_Value",  length(Large_Value)),
            rep("Large_Growth", length(Large_Growth)),
            rep("Small_Value",  length(Small_Value)),
            rep("Small_Growth", length(Small_Growth)))
)

all_funds <- style_map$Fund

# --- 3) Funkcija RBSA za en sklad (QP; β >= 0, sum β = 1) ---
rbsa_one <- function(fund, data, x_cols) {
  y <- as.numeric(data[[fund]])
  X <- as.matrix(data[, x_cols, drop = FALSE])

  ok <- stats::complete.cases(cbind(y, X))
  y  <- y[ok]; X <- X[ok, , drop = FALSE]

  K    <- ncol(X)
  Dmat <- t(X) %*% X
  dvec <- t(X) %*% y

  # ENAKOST naj bo PRVA kolona (sum β = 1), nato neenakosti (β >= 0)
  Amat <- cbind(rep(1, K), diag(K))
  bvec <- c(1, rep(0, K))
  meq  <- 1

  Dmat <- as.matrix(Dmat + diag(1e-8, K))  # stabilizacija

  fit <- solve.QP(Dmat = Dmat, dvec = as.numeric(dvec),
                  Amat = Amat, bvec = bvec, meq = meq)

  w <- as.numeric(fit$solution)
  w[w < 0] <- 0
  w <- w / sum(w)
  names(w) <- x_cols

  y_hat <- as.vector(X %*% w)
  SSR   <- sum((y - y_hat)^2)
  SSTO  <- sum((y - mean(y))^2)
  R2    <- 1 - SSR/SSTO
  RMSE  <- sqrt(mean((y - y_hat)^2))

  tibble(
    Fund = fund,
    Nobs = length(y),
    R2   = round(R2, 4),
    RMSE = round(RMSE, 4),
    !!!lapply(as.list(w), function(v) round(v, 4))
  )
}

# --- 4) Zagon za vse sklade, pripni stile ---
fund_results <- map_dfr(all_funds, rbsa_one, data = sub, x_cols = x_cols) %>%
  left_join(style_map, by = "Fund") %>%
  relocate(Style, .after = Fund)

# --- 5) Povprečja po stilih (R² in uteži) ---
weight_cols <- x_cols  # 6 indeksov

style_avg <- fund_results %>%
  group_by(Style) %>%
  summarise(
    N_funds = dplyr::n(),
    Avg_R2  = round(mean(R2, na.rm = TRUE), 4),
    across(
      all_of(weight_cols),
      ~ round(mean(.x, na.rm = TRUE), 4),
      .names = "Avg_{.col}"
    ),
    .groups = "drop"
  )
 

# --- 6) Shranjevanje v Excel (več listov) ---
wb <- openxlsx::createWorkbook()
openxlsx::addWorksheet(wb, "Fund_Results")
openxlsx::addWorksheet(wb, "Style_Averages")
openxlsx::writeData(wb, "Fund_Results", fund_results)
openxlsx::writeData(wb, "Style_Averages", style_avg)

for (st in unique(fund_results$Style)) {
  openxlsx::addWorksheet(wb, st)
  openxlsx::writeData(wb, st, dplyr::filter(fund_results, Style == st))
}

openxlsx::saveWorkbook(wb, file = "RBSA_AllFunds_2016_2018.xlsx", overwrite = TRUE)

## Warning in file.create(to[okay]): cannot create file
## 'RBSA_AllFunds_2016_2018.xlsx', reason 'Permission denied'

# Hiter pregled
print(style_avg)

## # A tibble: 4 × 9
##   Style        N_funds Avg_R2 Avg_Russell_1000_Growth Avg_Russell_1000_Value
##   <chr>          <int>  <dbl>                   <dbl>                  <dbl>
## 1 Large_Growth      20  0.909                   0.912                 0.0403
## 2 Large_Value       20  0.951                   0.162                 0.736 
## 3 Small_Growth      20  0.932                   0.181                 0.0495
## 4 Small_Value       20  0.911                   0.036                 0.138 
## # ℹ 4 more variables: Avg_Russell_2000_Growth <dbl>,
## #   Avg_Russell_2000_Value <dbl>, Avg_Bond_US <dbl>, `Avg_T-Bill_US` <dbl>

Magistrska naloga final

Anej Levpuscek

2025-09-06

R Markdown