financial analysis
#beskr av filen:
#gaar igenom casestudy3 (ja jag skrev 4 men det 'a'r fel i namnet).
#kod1.R 'a'r nyare version av kod.R saa d'a'rav 'a'r denna nyast hittills.
#nu uppt'a'ckte jag att man bara intresserar sig foor 10 year yeild i analysen
#saa ja gplockar ut bara den. och dessutom finns den paa yahoo => beh ej importera
#och skit
#http://finance.yahoo.com/echarts?s=%5ETNX+Interactive#{"customRangeStart":946854000,"customRangeEnd":1331506800,"range":"custom","allowChartStacking":true}
#CBOE Interest Rate 10 Year T No (^TNX)
################################################################################
#get data, structure of data, look at table, plot data#get data
###
library("quantmod")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("tseries")
library("vars")
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library("fxregime")
date.start<-20000101
date.end<-20130531
date.start1<-"2000-01-01"
date.end1<-"2013-05-31"
#GET DATA
TNX <- getYahooData("^TNX", start=date.start, end=date.end)
#STRUCTURE OF DATA
dim(TNX)
## [1] 3370 5
str(TNX)
## An 'xts' object on 2000-01-03/2013-05-31 containing:
## Data: num [1:3370, 1:5] 6.5 6.53 6.52 6.56 6.54 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:5] "Open" "High" "Low" "Close" ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
is.matrix(TNX)
## [1] TRUE
mode(TNX) #numeric
## [1] "numeric"
class(TNX) # object-oriented classes are xts and zoo
## [1] "xts" "zoo"
#LOOK AT TABLE
head(TNX)
## Open High Low Close Volume
## 2000-01-03 6.498 6.603 6.498 6.548 0
## 2000-01-04 6.530 6.548 6.485 6.485 0
## 2000-01-05 6.521 6.599 6.508 6.599 0
## 2000-01-06 6.558 6.585 6.540 6.549 0
## 2000-01-07 6.545 6.595 6.504 6.504 0
## 2000-01-10 6.540 6.567 6.536 6.558 0
tail(TNX)
## Open High Low Close Volume
## 2013-05-23 2.012 2.051 1.982 2.023 0
## 2013-05-24 2.011 2.030 1.981 2.011 0
## 2013-05-28 2.042 2.137 2.041 2.135 0
## 2013-05-29 2.147 2.167 2.121 2.124 0
## 2013-05-30 2.147 2.158 2.094 2.124 0
## 2013-05-31 2.092 2.211 2.084 2.164 0
#PLOT DATA
chartSeries(TNX[,1:5])
################################################################################
#2.0
#take out 10 year yeild.
#only look at close
#make daily weekly monthly
###
y.DGS10.daily <- TNX[,"Close"] #take close price only
str(y.DGS10.daily) #check
## An 'xts' object on 2000-01-03/2013-05-31 containing:
## Data: num [1:3370, 1] 6.55 6.49 6.6 6.55 6.5 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "Close"
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
head(y.DGS10.daily) #check
## Close
## 2000-01-03 6.548
## 2000-01-04 6.485
## 2000-01-05 6.599
## 2000-01-06 6.549
## 2000-01-07 6.504
## 2000-01-10 6.558
sum(is.na(y.DGS10.daily)) #check
## [1] 0
head(to.weekly(y.DGS10.daily)) #check that this is consistent with line above
## y.DGS10.daily.Open y.DGS10.daily.High y.DGS10.daily.Low
## 2000-01-07 6.548 6.599 6.485
## 2000-01-14 6.558 6.696 6.558
## 2000-01-21 6.748 6.781 6.725
## 2000-01-28 6.684 6.685 6.648
## 2000-02-04 6.667 6.667 6.475
## 2000-02-11 6.631 6.669 6.599
## y.DGS10.daily.Close
## 2000-01-07 6.504
## 2000-01-14 6.674
## 2000-01-21 6.781
## 2000-01-28 6.648
## 2000-02-04 6.517
## 2000-02-11 6.621
# below, convert data.
#we will analyze weekly and monthly data.
y.DGS10.weekly <- to.weekly(y.DGS10.daily)[,4] #4th column is close
y.DGS10.monthly <- to.monthly(y.DGS10.daily)[,4]
dim(y.DGS10.weekly); dim(y.DGS10.monthly)
## [1] 700 1
## [1] 161 1
################################################################################
#acf pacf plots
###
#par( mfcol=c(2,3) )
acf(y.DGS10.daily)
acf(y.DGS10.daily,type="partial")
acf(y.DGS10.weekly)
acf(y.DGS10.weekly,type="partial")
acf(y.DGS10.monthly)
acf(y.DGS10.monthly,type="partial")
################################################################################
#testing for unit root
###
#H0: series has unit root
adf.test(y.DGS10.daily)
##
## Augmented Dickey-Fuller Test
##
## data: y.DGS10.daily
## Dickey-Fuller = -3.365, Lag order = 14, p-value = 0.05944
## alternative hypothesis: stationary
adf.test(y.DGS10.weekly) #qq faar annorlunda, sid11
##
## Augmented Dickey-Fuller Test
##
## data: y.DGS10.weekly
## Dickey-Fuller = -3.0415, Lag order = 8, p-value = 0.1375
## alternative hypothesis: stationary
adf.test(y.DGS10.monthly)
##
## Augmented Dickey-Fuller Test
##
## data: y.DGS10.monthly
## Dickey-Fuller = -2.7197, Lag order = 5, p-value = 0.2761
## alternative hypothesis: stationary
#lower p value when lower periodicity of data (p daily < p monthly)
#so stronger time series structure at higher freq.
#hence do diff()
sum(is.na(y.DGS10.daily))
## [1] 0
adf.test( na.omit(diff(y.DGS10.daily)) )
## Warning in adf.test(na.omit(diff(y.DGS10.daily))): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(y.DGS10.daily))
## Dickey-Fuller = -14.347, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
adf.test( na.omit(diff(y.DGS10.weekly)) )
## Warning in adf.test(na.omit(diff(y.DGS10.weekly))): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(y.DGS10.weekly))
## Dickey-Fuller = -9.6135, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test( na.omit(diff(y.DGS10.monthly)) )
## Warning in adf.test(na.omit(diff(y.DGS10.monthly))): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(y.DGS10.monthly))
## Dickey-Fuller = -6.6264, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#all have low pvalues
#observation: a high first order autocorr => conclusion: data has unit root
#for daily weekly monthly (all three)
################################################################################
#acf pacf plots diffed data
###
#par( mfcol=c(2,3) ) #par(mfcol=c(1,1)) is standard
acf(na.omit( diff(y.DGS10.daily )))
acf(na.omit( diff(y.DGS10.daily )) , type="partial")
acf(na.omit( diff(y.DGS10.weekly )) )
acf(na.omit( diff(y.DGS10.weekly )) , type="partial")
acf(na.omit( diff(y.DGS10.monthly )) )
acf(na.omit( diff(y.DGS10.monthly )) , type="partial")
# observations from the graphs:
# daily: acf 2, pacf 2
# weekly: probably not. perhaps acf 15, pacf 18.
# monthly: acf 2, pacf 2.
################################################################################
plot(y.DGS10.monthly) #need to diff() this
plot(diff(y.DGS10.monthly)); abline(0,0, lty=2)
#looks like cov-stationary time series strucutre.
#but is it white noise or something else? we fit an AR(p) model.
################################################################################
# 1.6 skippas
# men kod beh foor vidare analys saa koor igenom
sum(is.na(diff(y.DGS10.monthly)))
## [1] 1
y0 <- na.omit( diff(y.DGS10.monthly) )
y0.acf <- acf(y0, type = "correlation")
y0.pacf <- acf(y0, type = "partial")
y0.lag1 <- lag(y0 , k=1)
y0.lag2 <- lag(y0.lag1, k=1)
y0.lag3 <- lag(y0.lag2, k=1)
y0.lag4 <- lag(y0.lag3, k=1)
y0.lag5 <- lag(y0.lag4, k=1)
y0.lag6 <- lag(y0.lag5, k=1)
y0.lag7 <- lag(y0.lag6, k=1)
################################################################################
#we saw above that
#monthly: acf 2, pacf 2.
#created by the code
#acf(na.omit( diff(y.DGS10.monthly )) , type="partial")
#so we fit an AR(2) model, then look at roots of char polynomial (qq infoga l'a'nk)
# se oxaa oldcode jl_151212_142242
#best AR?
y0.ar <- ar(y0) #fits best model according to aic
names(y0.ar)
## [1] "order" "ar" "var.pred" "x.mean"
## [5] "aic" "n.used" "order.max" "partialacf"
## [9] "resid" "method" "series" "frequency"
## [13] "call" "asy.var.coef"
y0.ar$order.max
## [1] 22
y0.ar$order #7 order fit is best
## [1] 7
#create dataset with 7lags
df.y0to.l7 <- cbind(y0, y0.lag1, y0.lag2, y0.lag3, y0.lag4,
y0.lag5, y0.lag6, y0.lag7)
names(df.y0to.l7) <- c('y0', 'y0.lag1', 'y0.lag2', 'y0.lag3', 'y0.lag4',
'y0.lag5', 'y0.lag6', 'y0.lag7')
head(df.y0to.l7)
## y0 y0.lag1 y0.lag2 y0.lag3 y0.lag4 y0.lag5 y0.lag6 y0.lag7
## Feb 2000 -0.258 NA NA NA NA NA NA NA
## Mar 2000 -0.386 -0.258 NA NA NA NA NA NA
## Apr 2000 0.191 -0.386 -0.258 NA NA NA NA NA
## May 2000 0.071 0.191 -0.386 -0.258 NA NA NA NA
## Jun 2000 -0.267 0.071 0.191 -0.386 -0.258 NA NA NA
## Jul 2000 0.011 -0.267 0.071 0.191 -0.386 -0.258 NA NA
#fit AR(2)
lmfit.lag2 <- lm(y0 ~ y0.lag1 + y0.lag2, data = df.y0to.l7)
summary(lmfit.lag2)
##
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2, data = df.y0to.l7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98599 -0.18138 0.02206 0.16465 0.85711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02987 0.02200 -1.358 0.17644
## y0.lag1 0.04152 0.07887 0.526 0.59932
## y0.lag2 -0.21968 0.07877 -2.789 0.00595 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2734 on 155 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.04873, Adjusted R-squared: 0.03646
## F-statistic: 3.97 on 2 and 155 DF, p-value: 0.02082
#lag 2 is sign as seen in plots before.
#note that size and sign of sign, compared to the plot, does match up.
#fit AR(4)
lmfit.lag4 <- lm(y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4, data = df.y0to.l7)
summary(lmfit.lag4)
##
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4, data = df.y0to.l7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97375 -0.18024 0.02183 0.14938 0.84687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02923 0.02253 -1.297 0.19646
## y0.lag1 0.07499 0.08226 0.912 0.36344
## y0.lag2 -0.23454 0.08202 -2.860 0.00484 **
## y0.lag3 0.10518 0.08149 1.291 0.19878
## y0.lag4 -0.05891 0.08165 -0.721 0.47173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2747 on 151 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.05987, Adjusted R-squared: 0.03497
## F-statistic: 2.404 on 4 and 151 DF, p-value: 0.05219
#fit AR(7)
lmfit.lag7 <- lm(y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4
+ y0.lag5 + y0.lag6 + y0.lag7, data = df.y0to.l7)
summary(lmfit.lag7)
##
## Call:
## lm(formula = y0 ~ y0.lag1 + y0.lag2 + y0.lag3 + y0.lag4 + y0.lag5 +
## y0.lag6 + y0.lag7, data = df.y0to.l7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86962 -0.18191 0.00932 0.14497 0.78273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04367 0.02292 -1.905 0.05871 .
## y0.lag1 0.02279 0.08258 0.276 0.78292
## y0.lag2 -0.25201 0.08178 -3.081 0.00247 **
## y0.lag3 0.07206 0.08340 0.864 0.38897
## y0.lag4 -0.06716 0.08338 -0.806 0.42183
## y0.lag5 -0.15105 0.08357 -1.807 0.07276 .
## y0.lag6 -0.15653 0.08138 -1.923 0.05639 .
## y0.lag7 -0.16107 0.08220 -1.959 0.05198 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2692 on 145 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.1234, Adjusted R-squared: 0.08112
## F-statistic: 2.917 on 7 and 145 DF, p-value: 0.006949
#qq lite diff s24
#loos qq ovan och sen gaa vidare
#kanske se paa https://cran.r-project.org/web/packages/FitAR/FitAR.pdf
################################################################################
# Ar(2) more mathematical info (s22
###
lmfit.lag2$coef
## (Intercept) y0.lag1 y0.lag2
## -0.02987474 0.04151980 -0.21968084
lmfit.phi1 <- lmfit.lag2$coef[2]
lmfit.phi2 <- lmfit.lag2$coef[3]
# polyroot() returns complex roots of polynomial
char.roots <- polyroot( c(1,-1.*lmfit.phi1, -1.*lmfit.phi2) )
char.roots
## [1] 0.0945+2.131461i 0.0945-2.131461i
(Conj(char.roots)*char.roots) #jfr s22 4.6
## [1] 4.552058+0i 4.552058+0i
#they are greater than 1 i.e. outside unit circle, hence AR(2) is STATIONARY.
# With complex roots, there is evidence of cyclicality in the series
# The following computation computes the period as it is determined by the
# coefficients of the characteristic polynomial.
a.phi1.over.root.phi2 <- abs(lmfit.phi1) / ( 2*sqrt(-lmfit.phi2) )
two.pif0 <- acos(a.phi1.over.root.phi2)
f0 <- two.pif0/(8*atan(1))
periodis <- 1/f0
(as.numeric(periodis))
## [1] 4.116101
# The data are consistent with cycle of period just over 4 months.
# qq mattebevis
################################################################################
#more info about the best AR
###
y0.ar <- ar(y0)
names(y0.ar)
## [1] "order" "ar" "var.pred" "x.mean"
## [5] "aic" "n.used" "order.max" "partialacf"
## [9] "resid" "method" "series" "frequency"
## [13] "call" "asy.var.coef"
y0.ar$order.max
## [1] 22
y0.ar$order
## [1] 7
y0.ar$ar
## [1] 0.02527869 -0.24250903 0.07242012 -0.06768100 -0.13677650 -0.14360652
## [7] -0.15172954
y0.ar$var.pred
## [1] 0.07215729
y0.ar$x.mean
## [1] -0.02814375
y0.ar$aic #we see aic is defn here so that best model get aic=0 higher is worse
## 0 1 2 3 4 5 6
## 5.166594 6.942271 1.404311 1.929277 3.438440 3.412821 1.726559
## 7 8 9 10 11 12 13
## 0.000000 1.927865 3.075245 5.065248 5.494767 7.196374 7.688528
## 14 15 16 17 18 19 20
## 9.677226 11.557274 12.252682 14.232046 16.147314 16.893877 18.887133
## 21 22
## 19.385577 21.254363
y0.ar$n.used
## [1] 160
y0.ar$method
## [1] "Yule-Walker"
y0.ar$series
## [1] "y0"
y0.ar$frequency
## [1] 12
y0.ar$call
## ar(x = y0)
y0.ar$asy.var.coef #The asymptotic-theory variance matrix of the coef estimates
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.0064274878 -0.0003096582 0.0014589210 -0.0005440087 0.0005175610
## [2,] -0.0003096582 0.0062960153 -0.0004792128 0.0014070214 -0.0004868435
## [3,] 0.0014589210 -0.0004792128 0.0065598497 -0.0006556583 0.0015801702
## [4,] -0.0005440087 0.0014070214 -0.0006556583 0.0065642178 -0.0006556583
## [5,] 0.0005175610 -0.0004868435 0.0015801702 -0.0006556583 0.0065598497
## [6,] 0.0006577679 0.0002656965 -0.0004868435 0.0014070214 -0.0004792128
## [7,] 0.0009700134 0.0006577679 0.0005175610 -0.0005440087 0.0014589210
## [,6] [,7]
## [1,] 0.0006577679 0.0009700134
## [2,] 0.0002656965 0.0006577679
## [3,] -0.0004868435 0.0005175610
## [4,] 0.0014070214 -0.0005440087
## [5,] -0.0004792128 0.0014589210
## [6,] 0.0062960153 -0.0003096582
## [7,] -0.0003096582 0.0064274878
#long output y0.ar$partialacf y0.ar$resid
#qq skapa plot sid23
#qq n'a'r han goor sid 24 hoogst upp faar han andra estimat 'a'n min y0.ar$ar varfoor??
#qq forst'a'tt med sid24 och framaat, n'a'r du har loost hur man goor med lm() trtos att data ej 'a'r specad.
################################################################################
#evaluate stationarity of this best AR
#jfr s25
#if the smallest root modolus is outside the complex unit circle then stationary
###
#same evaluation as with the AR(2)
charac.roots.y10 <- polyroot(c(1,-1*y0.ar$ar))
charac.roots.y10.modsq <- Conj(charac.roots.y10)*charac.roots.y10
real.part <- (Re(charac.roots.y10.modsq))
imaginary.part <- (Im(charac.roots.y10.modsq))
charac.roots.y10.modsq0 <- sqrt(real.part^2 + imaginary.part^2)
(charac.roots.y10)
## [1] 1.008544+0.620868i -0.794175+1.115372i -0.794175-1.115372i
## [4] 1.008544-0.620868i 0.074456+1.280196i -1.524114-0.000000i
## [7] 0.074456-1.280196i
(charac.roots.y10.modsq0)
## [1] 1.402638 1.874769 1.874769 1.402638 1.644445 2.322922 1.644445
(min(charac.roots.y10.modsq0))
## [1] 1.402638
#the smallest root modulus lies outside the (complex) unit circle.
#Hence STATIONARITY
################################################################################
#influence measure and case-deletion for this best AR
###
#summary(lmfit.lag7)
names(lmfit.lag7)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "na.action" "xlevels" "call" "terms"
## [13] "model"
length(lmfit.lag7$terms)
## [1] 3
lmfit.lag7$fitted.values[1:10,]
length(lmfit.lag7\(residuals) length(time(lmfit.lag7\)residuals)) length(coredata(lmfit.lag7\(residuals)) lmfit.lag7\)residuals <- zoo(as.numeric(lmfit.lag7\(residuals), order.by = time(lmfit.lag7\)residuals)[-c(1:7),] ) plot(lmfit.lag7$residuals)