#Q1. This problem uses the data set ford.csv on the book’s web site. 
#The data were taken from the ford.s data set in R’s fEcofin package. 
#This package is no longer on CRAN. 
#This data set contains 2000 daily Ford returns from January 2, 1984, to December 31, 1991.
rm(list = ls())
con = gzcon(url('https://github.com/systematicinvestor/SIT/raw/master/sit.gz', 'rb'))
source(con)
close(con)

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x .GlobalEnv::count() masks dplyr::count()
## x .GlobalEnv::cross() masks purrr::cross()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()
## x .GlobalEnv::lst()   masks dplyr::lst(), tibble::lst()
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## 
## Attaching package: 'TTR'
## The following object is masked _by_ '.GlobalEnv':
## 
##     DVI
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked _by_ '.GlobalEnv':
## 
##     count, join
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library(fBasics)
## Warning: package 'fBasics' was built under R version 3.6.3
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.6.2
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.6.3
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
## 
## Attaching package: 'fBasics'
## The following objects are masked _by_ '.GlobalEnv':
## 
##     inv, vec
## The following object is masked from 'package:TTR':
## 
##     volatility
library(fEcofin)
## 
## Attaching package: 'fEcofin'
## The following object is masked from 'package:fBasics':
## 
##     cars2
#install.packages(fEcofin)
data(ford.s , package = "fEcofin")
tail(ford.s)
##       X.m..d..y         FORD
## 1995 12/23/1991  0.103960400
## 1996 12/24/1991  0.008968610
## 1997 12/26/1991  0.008888889
## 1998 12/27/1991 -0.022026430
## 1999 12/30/1991  0.018018020
## 2000 12/31/1991 -0.004424779
#(a) Find the sample mean, sample median, and standard deviation of the Ford returns.
ford.return = ford.s$FORD
ford.matrix = as.matrix(ford.return)
mean = mean(ford.return)
mean
## [1] 0.0007600789
median = median(rnorm(ford.matrix))
median
## [1] 0.01707339
sd = sd(ford.matrix)
sd
## [1] 0.01831557
#(b) Create a normal plot of the Ford returns. 
#Do the returns look normally distributed? If not, how do they differ from being normally distributed?
n = length(ford.return)
year_SP = 1984 + (1:n) * (1991.25 - 1984) / n
plot(year_SP, ford.return, main = "Ford daily returns",
xlab = "year", type = "l", ylab = "log return")

plot(as.data.frame(ford.return))

hist(ford.return, 80, freq= FALSE)

hist(ford.return)

plot(as.data.frame(ford.return))

hist(ford.return, 80, freq= FALSE)

hist(ford.return)

hist(ford.return)

#(c) Test for normality using the Shapiro–Wilk test? What is the p-value?
#Can you reject the null hypothes is of a normal distribution at 0.01?
sh <- shapiro.test(ford.return) # Shapiro--Wilk
sh
## 
##  Shapiro-Wilk normality test
## 
## data:  ford.return
## W = 0.96388, p-value < 2.2e-16
# p- value = 2.2e-16 =  0.00000000000000022 means less than 0.05. Therefore it can not reject null hypothes, even at 0.01. 
# In other word it is not normally distributed.
#(d) Create several t-plots of the Ford returns using a number of choices of the degrees of freedom parameter
n=dim(ford.matrix)[1]
q_grid = (1:n) / (n + 1)
df_grid = c(1, 4, 6, 10, 20, 30)
index.names = dimnames(ford.matrix)[[2]]

for(i in 1:1){
   #dev.new()
 par(mfrow = c(3, 2))
  for(df in df_grid){
    qqplot(ford.matrix[,i], qt(q_grid,df),
           main = paste(index.names[,i], ", df = ", df) )
    abline(lm(qt(c(0.25, 0.75), df = df) ~
                quantile(ford.matrix[,i], c(0.25, 0.75))))} }

#(f) What value of df gives a plot that is as linear as possible? 
#The returns include the return on Black Monday, October 19, 1987. 
#Discuss whether or not to ignore that return when looking for the best choices of df.

# If df=6 it is linear as possible