financial analysis

#beskr av filen:
#gaar igenom casestudy3 (ja jag skrev 4 men det 'a'r fel i namnet). 
#kod1.R 'a'r nyare version av kod.R saa d'a'rav 'a'r denna nyast hittills. 

#nu uppt'a'ckte jag att man bara intresserar sig foor 10 year yeild i analysen
#saa ja gplockar ut bara den. och dessutom finns den paa yahoo => beh ej importera
#och skit

#http://finance.yahoo.com/echarts?s=%5ETNX+Interactive#{"customRangeStart":946854000,"customRangeEnd":1331506800,"range":"custom","allowChartStacking":true}
#CBOE Interest Rate 10 Year T No (^TNX) 

################################################################################
#get data, structure of data, look at table, plot data#get data
### 

library("quantmod")  
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("tseries")  
library("vars")  
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library("fxregime")  

date.start<-20000101
date.end<-20130531
date.start1<-"2000-01-01"
date.end1<-"2013-05-31"


#GET DATA 
TNX <- getYahooData("^TNX", start=date.start, end=date.end)

#STRUCTURE OF DATA
dim(TNX)
## [1] 3370    5
str(TNX)
## An 'xts' object on 2000-01-03/2013-05-31 containing:
##   Data: num [1:3370, 1:5] 6.5 6.53 6.52 6.56 6.54 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:5] "Open" "High" "Low" "Close" ...
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL
is.matrix(TNX)
## [1] TRUE
mode(TNX)  #numeric
## [1] "numeric"
class(TNX) # object-oriented classes are xts and zoo
## [1] "xts" "zoo"
#LOOK AT TABLE
head(TNX)
##             Open  High   Low Close Volume
## 2000-01-03 6.498 6.603 6.498 6.548      0
## 2000-01-04 6.530 6.548 6.485 6.485      0
## 2000-01-05 6.521 6.599 6.508 6.599      0
## 2000-01-06 6.558 6.585 6.540 6.549      0
## 2000-01-07 6.545 6.595 6.504 6.504      0
## 2000-01-10 6.540 6.567 6.536 6.558      0
tail(TNX)
##             Open  High   Low Close Volume
## 2013-05-23 2.012 2.051 1.982 2.023      0
## 2013-05-24 2.011 2.030 1.981 2.011      0
## 2013-05-28 2.042 2.137 2.041 2.135      0
## 2013-05-29 2.147 2.167 2.121 2.124      0
## 2013-05-30 2.147 2.158 2.094 2.124      0
## 2013-05-31 2.092 2.211 2.084 2.164      0
#PLOT DATA
chartSeries(TNX[,1:5])

################################################################################
#2.0
#take out 10 year yeild. 
#only look at close
#make daily weekly monthly
### 

y.DGS10.daily <- TNX[,"Close"]  #take close price only 
str(y.DGS10.daily) #check 
## An 'xts' object on 2000-01-03/2013-05-31 containing:
##   Data: num [1:3370, 1] 6.55 6.49 6.6 6.55 6.5 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "Close"
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL
head(y.DGS10.daily) #check 
##            Close
## 2000-01-03 6.548
## 2000-01-04 6.485
## 2000-01-05 6.599
## 2000-01-06 6.549
## 2000-01-07 6.504
## 2000-01-10 6.558
sum(is.na(y.DGS10.daily)) #check 
## [1] 0
head(to.weekly(y.DGS10.daily)) #check that this is consistent with line above
##            y.DGS10.daily.Open y.DGS10.daily.High y.DGS10.daily.Low
## 2000-01-07              6.548              6.599             6.485
## 2000-01-14              6.558              6.696             6.558
## 2000-01-21              6.748              6.781             6.725
## 2000-01-28              6.684              6.685             6.648
## 2000-02-04              6.667              6.667             6.475
## 2000-02-11              6.631              6.669             6.599
##            y.DGS10.daily.Close
## 2000-01-07               6.504
## 2000-01-14               6.674
## 2000-01-21               6.781
## 2000-01-28               6.648
## 2000-02-04               6.517
## 2000-02-11               6.621
# below, convert data. 
#we will analyze weekly and monthly data. 
y.DGS10.weekly <-  to.weekly(y.DGS10.daily)[,4] #4th column is close 
y.DGS10.monthly <-  to.monthly(y.DGS10.daily)[,4]
dim(y.DGS10.weekly); dim(y.DGS10.monthly)
## [1] 700   1
## [1] 161   1
################################################################################
#acf pacf plots 
###

#par( mfcol=c(2,3) )
acf(y.DGS10.daily)

acf(y.DGS10.daily,type="partial")

acf(y.DGS10.weekly)

acf(y.DGS10.weekly,type="partial")

acf(y.DGS10.monthly)

acf(y.DGS10.monthly,type="partial")

################################################################################
#testing for unit root 
### 

#H0: series has unit root 
adf.test(y.DGS10.daily)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y.DGS10.daily
## Dickey-Fuller = -3.365, Lag order = 14, p-value = 0.05944
## alternative hypothesis: stationary
adf.test(y.DGS10.weekly) #qq faar annorlunda, sid11
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y.DGS10.weekly
## Dickey-Fuller = -3.0415, Lag order = 8, p-value = 0.1375
## alternative hypothesis: stationary
adf.test(y.DGS10.monthly)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y.DGS10.monthly
## Dickey-Fuller = -2.7197, Lag order = 5, p-value = 0.2761
## alternative hypothesis: stationary
#lower p value when lower periodicity of data (p daily < p monthly) 
#so stronger time series structure at higher freq. 
#hence do diff()
sum(is.na(y.DGS10.daily))
## [1] 0
adf.test( na.omit(diff(y.DGS10.daily)) )
## Warning in adf.test(na.omit(diff(y.DGS10.daily))): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff(y.DGS10.daily))
## Dickey-Fuller = -14.347, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
adf.test( na.omit(diff(y.DGS10.weekly)) )
## Warning in adf.test(na.omit(diff(y.DGS10.weekly))): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff(y.DGS10.weekly))
## Dickey-Fuller = -9.6135, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test( na.omit(diff(y.DGS10.monthly)) )
## Warning in adf.test(na.omit(diff(y.DGS10.monthly))): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff(y.DGS10.monthly))
## Dickey-Fuller = -6.6264, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#all have low pvalues 
#observation: a high first order autocorr => conclusion: data has unit root
#for daily weekly monthly (all three)

################################################################################
#acf pacf plots diffed data 
###
#par( mfcol=c(2,3) ) #par(mfcol=c(1,1)) is standard
acf(na.omit( diff(y.DGS10.daily )))                  

acf(na.omit( diff(y.DGS10.daily )) , type="partial")

acf(na.omit( diff(y.DGS10.weekly )) )

acf(na.omit( diff(y.DGS10.weekly )) , type="partial") 

acf(na.omit( diff(y.DGS10.monthly )) )

acf(na.omit( diff(y.DGS10.monthly )) , type="partial")

# observations from the graphs:
# daily: acf 2, pacf 2
# weekly: probably not. perhaps acf 15, pacf 18. 
# monthly: acf 2, pacf 2.

################################################################################

plot(y.DGS10.monthly) #need to diff() this 

plot(diff(y.DGS10.monthly)); abline(0,0, lty=2)

#looks like cov-stationary time series strucutre. 
#but is it white noise or something else? we fit an AR(p) model.

################################################################################

# 1.6 skippas
# men kod beh foor vidare analys saa koor igenom 

sum(is.na(diff(y.DGS10.monthly)))
## [1] 1
y0 <- na.omit( diff(y.DGS10.monthly) )
y0.acf  <- acf(y0, type = "correlation")

y0.pacf <- acf(y0, type = "partial")

y0.lag1 <- lag(y0     , k=1)
y0.lag2 <- lag(y0.lag1, k=1)
y0.lag3 <- lag(y0.lag2, k=1)
y0.lag4 <- lag(y0.lag3, k=1)
y0.lag5 <- lag(y0.lag4, k=1)
y0.lag6 <- lag(y0.lag5, k=1)
y0.lag7 <- lag(y0.lag6, k=1)


################################################################################
#we saw above that 
#monthly: acf 2, pacf 2.
#created by the code
#acf(na.omit( diff(y.DGS10.monthly )) , type="partial")
#so we fit an AR(2) model, then look at roots of char polynomial (qq infoga l'a'nk)
# se oxaa oldcode jl_151212_142242


#best AR? 
y0.ar <- ar(y0) #fits best model according to aic
names(y0.ar)
##  [1] "order"        "ar"           "var.pred"     "x.mean"      
##  [5] "aic"          "n.used"       "order.max"    "partialacf"  
##  [9] "resid"        "method"       "series"       "frequency"   
## [13] "call"         "asy.var.coef"
y0.ar$order.max
## [1] 22
y0.ar$order #7 order fit is best 
## [1] 7
#create dataset with 7lags
df.y0to.l7 <- cbind(y0, y0.lag1, y0.lag2, y0.lag3, y0.lag4, 
                    y0.lag5, y0.lag6, y0.lag7)
names(df.y0to.l7) <- c('y0', 'y0.lag1', 'y0.lag2', 'y0.lag3', 'y0.lag4', 
                       'y0.lag5', 'y0.lag6', 'y0.lag7')
head(df.y0to.l7)
##              y0 y0.lag1 y0.lag2 y0.lag3 y0.lag4 y0.lag5 y0.lag6 y0.lag7
## Feb 2000 -0.258      NA      NA      NA      NA      NA      NA      NA
## Mar 2000 -0.386  -0.258      NA      NA      NA      NA      NA      NA
## Apr 2000  0.191  -0.386  -0.258      NA      NA      NA      NA      NA
## May 2000  0.071   0.191  -0.386  -0.258      NA      NA      NA      NA
## Jun 2000 -0.267   0.071   0.191  -0.386  -0.258      NA      NA      NA
## Jul 2000  0.011  -0.267   0.071   0.191  -0.386  -0.258      NA      NA
#fit AR(2)
lmfit.lag2 <- lm(y0 ~ y0.lag1 + y0.lag2, data = df.y0to.l7)
summary(lmfit.lag2)
## 
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2, data = df.y0to.l7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98599 -0.18138  0.02206  0.16465  0.85711 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.02987    0.02200  -1.358  0.17644   
## y0.lag1      0.04152    0.07887   0.526  0.59932   
## y0.lag2     -0.21968    0.07877  -2.789  0.00595 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2734 on 155 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.04873,    Adjusted R-squared:  0.03646 
## F-statistic:  3.97 on 2 and 155 DF,  p-value: 0.02082
#lag 2 is sign as seen in plots before.
#note that size and sign of sign, compared to the plot, does match up. 

#fit AR(4)
lmfit.lag4 <- lm(y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4, data = df.y0to.l7)
summary(lmfit.lag4)
## 
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4, data = df.y0to.l7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97375 -0.18024  0.02183  0.14938  0.84687 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.02923    0.02253  -1.297  0.19646   
## y0.lag1      0.07499    0.08226   0.912  0.36344   
## y0.lag2     -0.23454    0.08202  -2.860  0.00484 **
## y0.lag3      0.10518    0.08149   1.291  0.19878   
## y0.lag4     -0.05891    0.08165  -0.721  0.47173   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2747 on 151 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.05987,    Adjusted R-squared:  0.03497 
## F-statistic: 2.404 on 4 and 151 DF,  p-value: 0.05219
#fit AR(7)
lmfit.lag7 <- lm(y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4 
                 + y0.lag5 + y0.lag6 + y0.lag7, data = df.y0to.l7)
summary(lmfit.lag7)
## 
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4 + y0.lag5 + 
##     y0.lag6 + y0.lag7, data = df.y0to.l7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86962 -0.18191  0.00932  0.14497  0.78273 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.04367    0.02292  -1.905  0.05871 . 
## y0.lag1      0.02279    0.08258   0.276  0.78292   
## y0.lag2     -0.25201    0.08178  -3.081  0.00247 **
## y0.lag3      0.07206    0.08340   0.864  0.38897   
## y0.lag4     -0.06716    0.08338  -0.806  0.42183   
## y0.lag5     -0.15105    0.08357  -1.807  0.07276 . 
## y0.lag6     -0.15653    0.08138  -1.923  0.05639 . 
## y0.lag7     -0.16107    0.08220  -1.959  0.05198 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2692 on 145 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.1234, Adjusted R-squared:  0.08112 
## F-statistic: 2.917 on 7 and 145 DF,  p-value: 0.006949
#qq lite diff s24


#loos qq ovan och sen gaa vidare 
#kanske se paa https://cran.r-project.org/web/packages/FitAR/FitAR.pdf

################################################################################
# Ar(2) more mathematical info (s22
###

lmfit.lag2$coef
## (Intercept)     y0.lag1     y0.lag2 
## -0.02987474  0.04151980 -0.21968084
lmfit.phi1 <- lmfit.lag2$coef[2]
lmfit.phi2 <- lmfit.lag2$coef[3]
# polyroot() returns complex roots of polynomial
char.roots <- polyroot( c(1,-1.*lmfit.phi1, -1.*lmfit.phi2) )
char.roots
## [1] 0.0945+2.131461i 0.0945-2.131461i
(Conj(char.roots)*char.roots) #jfr s22 4.6
## [1] 4.552058+0i 4.552058+0i
#they are greater than 1 i.e. outside unit circle, hence AR(2) is STATIONARY.

# With complex roots, there is evidence of cyclicality in the series
# The following computation computes the period as it is determined by the
# coefficients of the characteristic polynomial.
a.phi1.over.root.phi2 <- abs(lmfit.phi1) / ( 2*sqrt(-lmfit.phi2) )
two.pif0 <- acos(a.phi1.over.root.phi2)
f0 <- two.pif0/(8*atan(1))
periodis <- 1/f0
(as.numeric(periodis))
## [1] 4.116101
# The data are consistent with cycle of period just over 4 months.
# qq mattebevis 

################################################################################
#more info about the best AR
###
y0.ar <- ar(y0)
names(y0.ar)
##  [1] "order"        "ar"           "var.pred"     "x.mean"      
##  [5] "aic"          "n.used"       "order.max"    "partialacf"  
##  [9] "resid"        "method"       "series"       "frequency"   
## [13] "call"         "asy.var.coef"
y0.ar$order.max
## [1] 22
y0.ar$order
## [1] 7
y0.ar$ar
## [1]  0.02527869 -0.24250903  0.07242012 -0.06768100 -0.13677650 -0.14360652
## [7] -0.15172954
y0.ar$var.pred
## [1] 0.07215729
y0.ar$x.mean
## [1] -0.02814375
y0.ar$aic #we see aic is defn here so that best model get aic=0 higher is worse
##         0         1         2         3         4         5         6 
##  5.166594  6.942271  1.404311  1.929277  3.438440  3.412821  1.726559 
##         7         8         9        10        11        12        13 
##  0.000000  1.927865  3.075245  5.065248  5.494767  7.196374  7.688528 
##        14        15        16        17        18        19        20 
##  9.677226 11.557274 12.252682 14.232046 16.147314 16.893877 18.887133 
##        21        22 
## 19.385577 21.254363
y0.ar$n.used
## [1] 160
y0.ar$method
## [1] "Yule-Walker"
y0.ar$series
## [1] "y0"
y0.ar$frequency
## [1] 12
y0.ar$call
## ar(x = y0)
y0.ar$asy.var.coef #The asymptotic-theory variance matrix of the coef estimates
##               [,1]          [,2]          [,3]          [,4]          [,5]
## [1,]  0.0064274878 -0.0003096582  0.0014589210 -0.0005440087  0.0005175610
## [2,] -0.0003096582  0.0062960153 -0.0004792128  0.0014070214 -0.0004868435
## [3,]  0.0014589210 -0.0004792128  0.0065598497 -0.0006556583  0.0015801702
## [4,] -0.0005440087  0.0014070214 -0.0006556583  0.0065642178 -0.0006556583
## [5,]  0.0005175610 -0.0004868435  0.0015801702 -0.0006556583  0.0065598497
## [6,]  0.0006577679  0.0002656965 -0.0004868435  0.0014070214 -0.0004792128
## [7,]  0.0009700134  0.0006577679  0.0005175610 -0.0005440087  0.0014589210
##               [,6]          [,7]
## [1,]  0.0006577679  0.0009700134
## [2,]  0.0002656965  0.0006577679
## [3,] -0.0004868435  0.0005175610
## [4,]  0.0014070214 -0.0005440087
## [5,] -0.0004792128  0.0014589210
## [6,]  0.0062960153 -0.0003096582
## [7,] -0.0003096582  0.0064274878
#long output y0.ar$partialacf   y0.ar$resid

#qq skapa plot sid23

#qq n'a'r han goor sid 24 hoogst upp faar han andra estimat 'a'n min y0.ar$ar varfoor??

#qq forst'a'tt med sid24 och framaat, n'a'r du har loost hur man goor med lm() trtos att data ej 'a'r specad. 

################################################################################
#evaluate stationarity of this best AR
#jfr s25
#if the smallest root modolus is outside the complex unit circle then stationary
###

#same evaluation as with the AR(2)
charac.roots.y10 <- polyroot(c(1,-1*y0.ar$ar))
charac.roots.y10.modsq <- Conj(charac.roots.y10)*charac.roots.y10
real.part <- (Re(charac.roots.y10.modsq))
imaginary.part <- (Im(charac.roots.y10.modsq))
charac.roots.y10.modsq0 <-  sqrt(real.part^2 + imaginary.part^2)
(charac.roots.y10)
## [1]  1.008544+0.620868i -0.794175+1.115372i -0.794175-1.115372i
## [4]  1.008544-0.620868i  0.074456+1.280196i -1.524114-0.000000i
## [7]  0.074456-1.280196i
(charac.roots.y10.modsq0)
## [1] 1.402638 1.874769 1.874769 1.402638 1.644445 2.322922 1.644445
(min(charac.roots.y10.modsq0))
## [1] 1.402638
#the smallest root modulus lies outside the (complex) unit circle. 
#Hence STATIONARITY

################################################################################
#influence measure and case-deletion for this best AR 
### 

#summary(lmfit.lag7)
names(lmfit.lag7)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "na.action"     "xlevels"       "call"          "terms"        
## [13] "model"
length(lmfit.lag7$terms)
## [1] 3

lmfit.lag7$fitted.values[1:10,]

length(lmfit.lag7\(residuals) length(time(lmfit.lag7\)residuals)) length(coredata(lmfit.lag7\(residuals)) lmfit.lag7\)residuals <- zoo(as.numeric(lmfit.lag7\(residuals), order.by = time(lmfit.lag7\)residuals)[-c(1:7),] ) plot(lmfit.lag7$residuals)

jfr jl_151212_150609 spec we.

qq error detta verkar ej funka likadant.

att skapa evaluation plots verkar heller ej funka likadant.