#Q1. This problem uses the data set ford.csv on the book’s web site.
#The data were taken from the ford.s data set in R’s fEcofin package.
#This package is no longer on CRAN.
#This data set contains 2000 daily Ford returns from January 2, 1984, to December 31, 1991.
rm(list = ls())
con = gzcon(url('https://github.com/systematicinvestor/SIT/raw/master/sit.gz', 'rb'))
source(con)
close(con)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x .GlobalEnv::count() masks dplyr::count()
## x .GlobalEnv::cross() masks purrr::cross()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x .GlobalEnv::lst() masks dplyr::lst(), tibble::lst()
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
##
## Attaching package: 'TTR'
## The following object is masked _by_ '.GlobalEnv':
##
## DVI
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked _by_ '.GlobalEnv':
##
## count, join
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(fBasics)
## Warning: package 'fBasics' was built under R version 3.6.3
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.6.2
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.6.3
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
##
## Attaching package: 'fBasics'
## The following objects are masked _by_ '.GlobalEnv':
##
## inv, vec
## The following object is masked from 'package:TTR':
##
## volatility
library(fEcofin)
##
## Attaching package: 'fEcofin'
## The following object is masked from 'package:fBasics':
##
## cars2
#install.packages(fEcofin)
data(ford.s , package = "fEcofin")
tail(ford.s)
## X.m..d..y FORD
## 1995 12/23/1991 0.103960400
## 1996 12/24/1991 0.008968610
## 1997 12/26/1991 0.008888889
## 1998 12/27/1991 -0.022026430
## 1999 12/30/1991 0.018018020
## 2000 12/31/1991 -0.004424779
#(a) Find the sample mean, sample median, and standard deviation of the Ford returns.
ford.return = ford.s$FORD
ford.matrix = as.matrix(ford.return)
mean = mean(ford.return)
mean
## [1] 0.0007600789
median = median(rnorm(ford.matrix))
median
## [1] 0.01707339
sd = sd(ford.matrix)
sd
## [1] 0.01831557
#(b) Create a normal plot of the Ford returns.
#Do the returns look normally distributed? If not, how do they differ from being normally distributed?
n = length(ford.return)
year_SP = 1984 + (1:n) * (1991.25 - 1984) / n
plot(year_SP, ford.return, main = "Ford daily returns",
xlab = "year", type = "l", ylab = "log return")

plot(as.data.frame(ford.return))

hist(ford.return, 80, freq= FALSE)

hist(ford.return)

plot(as.data.frame(ford.return))

hist(ford.return, 80, freq= FALSE)

hist(ford.return)

hist(ford.return)

#(c) Test for normality using the Shapiro–Wilk test? What is the p-value?
#Can you reject the null hypothes is of a normal distribution at 0.01?
sh <- shapiro.test(ford.return) # Shapiro--Wilk
sh
##
## Shapiro-Wilk normality test
##
## data: ford.return
## W = 0.96388, p-value < 2.2e-16
# p- value = 2.2e-16 = 0.00000000000000022 means less than 0.05. Therefore it can not reject null hypothes, even at 0.01.
# In other word it is not normally distributed.
#(d) Create several t-plots of the Ford returns using a number of choices of the degrees of freedom parameter
n=dim(ford.matrix)[1]
q_grid = (1:n) / (n + 1)
df_grid = c(1, 4, 6, 10, 20, 30)
index.names = dimnames(ford.matrix)[[2]]
for(i in 1:1){
#dev.new()
par(mfrow = c(3, 2))
for(df in df_grid){
qqplot(ford.matrix[,i], qt(q_grid,df),
main = paste(index.names[,i], ", df = ", df) )
abline(lm(qt(c(0.25, 0.75), df = df) ~
quantile(ford.matrix[,i], c(0.25, 0.75))))} }

#(f) What value of df gives a plot that is as linear as possible?
#The returns include the return on Black Monday, October 19, 1987.
#Discuss whether or not to ignore that return when looking for the best choices of df.
# If df=6 it is linear as possible