# Read csv file -  vol-AMZN.csv
vol_data<-read.csv("AMZN_rsi_cc1_preprocess.csv", header=T)
# factor analysis on the data, must be numeric and NA must be removed
# i've removed NA rows
vol_data <- vol_data[complete.cases(vol_data), ]


# remove date variable and dependent variable - y_cc1 (you do not need dependent variable)
vol_data <- subset(vol_data, select = c(8:29))


#null hypothesis in factor analysis is
# - the model described by the factor we have found is good enough
# - the model predicts the data well

# lower p-value -> doesn't predict -> need more factors
# high p-value -> doesn't mean you can comfortably stop
# go for one or two factors and see the gain before deciding the number of factors

# Steps below
# first try with one factor.
# if p-value is significant (lower p-value), try more factors
# add more factors until p-value is insignifant


fact <- factanal(vol_data, factors = 10)
print(fact)
## 
## Call:
## factanal(x = vol_data, factors = 10)
## 
## Uniquenesses:
##   Rsi2   Rsi3   Rsi4   Rsi5   Rsi6   Rsi7   Rsi8   Rsi9  Rsi10  Rsi15 
##  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005 
##  Rsi20  RsiD2  RsiD3  RsiD4  RsiD5  RsiD6  RsiD7  RsiD8  RsiD9 RsiD10 
##  0.005  0.186  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005 
## RsiD15 RsiD20 
##  0.005  0.008 
## 
## Loadings:
##        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7 Factor8
## Rsi2    0.538   0.693   0.114   0.171           0.418                 
## Rsi3    0.680   0.632   0.202                   0.290                 
## Rsi4    0.771   0.536   0.252                   0.204                 
## Rsi5    0.835   0.445   0.268                   0.145                 
## Rsi6    0.881   0.367   0.262                   0.102                 
## Rsi7    0.916   0.302   0.244                                         
## Rsi8    0.941   0.249   0.217                                         
## Rsi9    0.959   0.204   0.187                                         
## Rsi10   0.972   0.167   0.155                                         
## Rsi15   0.993                                                         
## Rsi20   0.981          -0.103                                  -0.130 
## RsiD2                           0.894                                 
## RsiD3           0.470           0.738   0.467                         
## RsiD4           0.772           0.411   0.460                         
## RsiD5   0.123   0.921           0.216   0.232          -0.156         
## RsiD6   0.174   0.967           0.112                                 
## RsiD7   0.223   0.959   0.147                                         
## RsiD8   0.268   0.919   0.247                           0.108         
## RsiD9   0.311   0.863   0.341                           0.170         
## RsiD10  0.351   0.800   0.425          -0.107           0.191         
## RsiD15  0.522   0.504   0.673                                         
## RsiD20  0.641   0.299   0.692                                         
##        Factor9 Factor10
## Rsi2                   
## Rsi3                   
## Rsi4                   
## Rsi5                   
## Rsi6                   
## Rsi7                   
## Rsi8                   
## Rsi9                   
## Rsi10                  
## Rsi15                  
## Rsi20                  
## RsiD2                  
## RsiD3                  
## RsiD4                  
## RsiD5                  
## RsiD6                  
## RsiD7                  
## RsiD8                  
## RsiD9                  
## RsiD10                 
## RsiD15                 
## RsiD20                 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## SS loadings      9.439   7.818   1.757   1.621   0.533   0.353   0.134
## Proportion Var   0.429   0.355   0.080   0.074   0.024   0.016   0.006
## Cumulative Var   0.429   0.784   0.864   0.938   0.962   0.978   0.984
##                Factor8 Factor9 Factor10
## SS loadings      0.061   0.019    0.014
## Proportion Var   0.003   0.001    0.001
## Cumulative Var   0.987   0.988    0.989
## 
## Test of the hypothesis that 10 factors are sufficient.
## The chi square statistic is 74172.81 on 56 degrees of freedom.
## The p-value is 0
# have to do this manually on the number of factors, if you choose more, it generates error
fact <- factanal(vol_data, factors = 15)
print(fact)
## 
## Call:
## factanal(x = vol_data, factors = 15)
## 
## Uniquenesses:
##   Rsi2   Rsi3   Rsi4   Rsi5   Rsi6   Rsi7   Rsi8   Rsi9  Rsi10  Rsi15 
##  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005 
##  Rsi20  RsiD2  RsiD3  RsiD4  RsiD5  RsiD6  RsiD7  RsiD8  RsiD9 RsiD10 
##  0.005  0.211  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005 
## RsiD15 RsiD20 
##  0.005  0.005 
## 
## Loadings:
##        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7 Factor8
## Rsi2    0.538   0.693   0.114   0.176           0.418                 
## Rsi3    0.680   0.632   0.202                   0.289                 
## Rsi4    0.771   0.536   0.251                   0.204                 
## Rsi5    0.835   0.445   0.268                   0.145                 
## Rsi6    0.881   0.367   0.262                   0.102                 
## Rsi7    0.916   0.303   0.243                                         
## Rsi8    0.941   0.249   0.217                                         
## Rsi9    0.959   0.205   0.187                                         
## Rsi10   0.972   0.168   0.155                                         
## Rsi15   0.993                                                         
## Rsi20   0.981          -0.103                                  -0.131 
## RsiD2                           0.883                                 
## RsiD3           0.468           0.754   0.441                         
## RsiD4           0.771           0.427   0.448                         
## RsiD5   0.123   0.920           0.225   0.227          -0.153         
## RsiD6   0.174   0.966           0.117                                 
## RsiD7   0.223   0.959   0.147                                         
## RsiD8   0.268   0.919   0.247                           0.106         
## RsiD9   0.311   0.863   0.340          -0.100           0.169         
## RsiD10  0.351   0.801   0.424          -0.108           0.191         
## RsiD15  0.522   0.505   0.672                                         
## RsiD20  0.641   0.299   0.694                                         
##        Factor9 Factor10 Factor11 Factor12 Factor13 Factor14 Factor15
## Rsi2                                                                
## Rsi3                                                                
## Rsi4                                                                
## Rsi5                                                                
## Rsi6                                                                
## Rsi7                                                                
## Rsi8                                                                
## Rsi9                                                                
## Rsi10                                                               
## Rsi15                                                               
## Rsi20                                                               
## RsiD2                                                               
## RsiD3                                                               
## RsiD4                                                               
## RsiD5                                                               
## RsiD6                                                               
## RsiD7                                                               
## RsiD8                                                               
## RsiD9                                                               
## RsiD10                                                              
## RsiD15                                                              
## RsiD20                                                              
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## SS loadings      9.438   7.815   1.757   1.647   0.498   0.353   0.132
## Proportion Var   0.429   0.355   0.080   0.075   0.023   0.016   0.006
## Cumulative Var   0.429   0.784   0.864   0.939   0.962   0.978   0.984
##                Factor8 Factor9 Factor10 Factor11 Factor12 Factor13
## SS loadings      0.062   0.018    0.014    0.001    0.000    0.000
## Proportion Var   0.003   0.001    0.001    0.000    0.000    0.000
## Cumulative Var   0.986   0.987    0.988    0.988    0.988    0.988
##                Factor14 Factor15
## SS loadings       0.000    0.000
## Proportion Var    0.000    0.000
## Cumulative Var    0.988    0.988
## 
## Test of the hypothesis that 15 factors are sufficient.
## The chi square statistic is 72288.53 on 6 degrees of freedom.
## The p-value is 0
# check corelation of variables with the factors
# rule of thumb -> anything greater than 0.7 means variable is well explained by the factor
load = fact$loadings[,1:15]

plot(load, type="n") # set up plot 
text(load,labels=names(vol_data),cex=.7) # add variable names