Calling libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.1
library(MASS)
## Warning: package 'MASS' was built under R version 3.3.2
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.3.2
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.2
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.2
library(car)
## Warning: package 'car' was built under R version 3.3.2
Exploring Smarket data set
names(Smarket)
## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5"
## [7] "Volume" "Today" "Direction"
attach(Smarket)
str(Smarket)
## 'data.frame': 1250 obs. of 9 variables:
## $ Year : num 2001 2001 2001 2001 2001 ...
## $ Lag1 : num 0.381 0.959 1.032 -0.623 0.614 ...
## $ Lag2 : num -0.192 0.381 0.959 1.032 -0.623 ...
## $ Lag3 : num -2.624 -0.192 0.381 0.959 1.032 ...
## $ Lag4 : num -1.055 -2.624 -0.192 0.381 0.959 ...
## $ Lag5 : num 5.01 -1.055 -2.624 -0.192 0.381 ...
## $ Volume : num 1.19 1.3 1.41 1.28 1.21 ...
## $ Today : num 0.959 1.032 -0.623 0.614 0.213 ...
## $ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
dim(Smarket)
## [1] 1250 9
#cor(Smarket)
C <-cor(Smarket[,-9])
corrgram(C)

corrplot(C)

corrplot(C,order="hclust")

corrplot(C,order="hclust",method="square",tl.cex = 0.6,cl.cex = 0.6)

#tl = text labels
#cl = color legend
Running Logistic Regression equation
glm.fit <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data = Smarket, family = binomial())
summary(glm.fit)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial(), data = Smarket)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.446 -1.203 1.065 1.145 1.326
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000 0.240736 -0.523 0.601
## Lag1 -0.073074 0.050167 -1.457 0.145
## Lag2 -0.042301 0.050086 -0.845 0.398
## Lag3 0.011085 0.049939 0.222 0.824
## Lag4 0.009359 0.049974 0.187 0.851
## Lag5 0.010313 0.049511 0.208 0.835
## Volume 0.135441 0.158360 0.855 0.392
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1727.6 on 1243 degrees of freedom
## AIC: 1741.6
##
## Number of Fisher Scoring iterations: 3
#Null Deviance and Residual Deviance difference has to be high for a good Model
# AIC of two models has to be compared for better model
contrasts(Direction)
## Up
## Down 0
## Up 1
#GLM model is build upon the UP variable (1 or 0) --> This is 1 --> UP
table(Direction)
## Direction
## Down Up
## 602 648
#Checking the coefficient of all variable
coefficients(glm.fit)
## (Intercept) Lag1 Lag2 Lag3 Lag4
## -0.126000257 -0.073073746 -0.042301344 0.011085108 0.009358938
## Lag5 Volume
## 0.010313068 0.135440659
#Checking the residul values
glm.fit$residuals[1250]
## 1250
## -2.07433
glm.fit$fitted.values[1250]
## 1250
## 0.5179166
# Calculating the probability of Stock market to go up
glm.prob <-predict(glm.fit, type ="response")
glm.prob[1:10]
## 1 2 3 4 5 6 7
## 0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 0.4926509
## 8 9 10
## 0.5092292 0.5176135 0.4888378
glm.pred <- rep("Down",1250)
glm.pred1 <- rep("Down",nrow(Smarket))
glm.pred[glm.prob> 0.5]<-"Up" # Substituting the value "Up" for the probablity >= 0.5
glm.pred
## [1] "Up" "Down" "Down" "Up" "Up" "Up" "Down" "Up" "Up"
## [10] "Down" "Down" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [19] "Up" "Down" "Up" "Up" "Up" "Down" "Up" "Up" "Down"
## [28] "Up" "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Up"
## [37] "Up" "Down" "Down" "Down" "Down" "Up" "Up" "Up" "Up"
## [46] "Up" "Up" "Down" "Up" "Up" "Up" "Down" "Down" "Down"
## [55] "Up" "Up" "Down" "Up" "Up" "Up" "Down" "Down" "Up"
## [64] "Down" "Down" "Down" "Down" "Up" "Down" "Down" "Up" "Up"
## [73] "Up" "Down" "Down" "Down" "Up" "Down" "Up" "Up" "Down"
## [82] "Down" "Up" "Up" "Up" "Up" "Down" "Down" "Down" "Down"
## [91] "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [100] "Down" "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [109] "Up" "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Up"
## [118] "Up" "Down" "Up" "Down" "Down" "Up" "Up" "Up" "Up"
## [127] "Up" "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [136] "Up" "Down" "Down" "Down" "Up" "Up" "Up" "Up" "Up"
## [145] "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [154] "Up" "Down" "Up" "Up" "Down" "Down" "Down" "Up" "Up"
## [163] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [172] "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Down" "Down"
## [181] "Down" "Down" "Up" "Up" "Up" "Up" "Down" "Down" "Up"
## [190] "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Down"
## [199] "Down" "Up" "Up" "Up" "Down" "Down" "Down" "Down" "Up"
## [208] "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up" "Up"
## [217] "Up" "Down" "Down" "Up" "Up" "Up" "Down" "Up" "Down"
## [226] "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down"
## [235] "Down" "Down" "Up" "Up" "Down" "Down" "Down" "Down" "Up"
## [244] "Up" "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [253] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [262] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Down"
## [271] "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [280] "Down" "Down" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [289] "Up" "Down" "Up" "Up" "Up" "Down" "Down" "Up" "Up"
## [298] "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [307] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down"
## [316] "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [325] "Up" "Down" "Up" "Up" "Up" "Up" "Down" "Down" "Up"
## [334] "Down" "Down" "Up" "Up" "Down" "Up" "Up" "Up" "Down"
## [343] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up"
## [352] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Down" "Up"
## [361] "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Up"
## [370] "Up" "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [379] "Up" "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Down"
## [388] "Down" "Up" "Up" "Up" "Up" "Down" "Down" "Down" "Down"
## [397] "Up" "Up" "Down" "Down" "Up" "Down" "Up" "Up" "Down"
## [406] "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Up"
## [415] "Down" "Down" "Down" "Down" "Up" "Up" "Down" "Up" "Up"
## [424] "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Down"
## [433] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Down" "Down"
## [442] "Down" "Up" "Up" "Down" "Down" "Up" "Up" "Up" "Up"
## [451] "Up" "Up" "Up" "Up" "Down" "Down" "Down" "Up" "Up"
## [460] "Up" "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Down"
## [469] "Down" "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [478] "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Down" "Down"
## [487] "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up" "Down"
## [496] "Down" "Down" "Down" "Up" "Up" "Up" "Down" "Up" "Up"
## [505] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [514] "Down" "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up"
## [523] "Up" "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Up"
## [532] "Up" "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up"
## [541] "Up" "Up" "Up" "Down" "Down" "Down" "Down" "Up" "Up"
## [550] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down"
## [559] "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Down"
## [568] "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Down" "Down"
## [577] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Down" "Down"
## [586] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Down"
## [595] "Down" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up"
## [604] "Up" "Up" "Down" "Up" "Up" "Down" "Down" "Up" "Up"
## [613] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down"
## [622] "Down" "Down" "Down" "Up" "Up" "Up" "Down" "Up" "Up"
## [631] "Up" "Up" "Up" "Up" "Down" "Up" "Down" "Down" "Up"
## [640] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Down" "Down"
## [649] "Down" "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Up"
## [658] "Up" "Down" "Down" "Down" "Down" "Down" "Up" "Up" "Up"
## [667] "Up" "Up" "Up" "Up" "Down" "Up" "Down" "Up" "Up"
## [676] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Down"
## [685] "Down" "Up" "Down" "Up" "Up" "Up" "Up" "Down" "Down"
## [694] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [703] "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [712] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [721] "Up" "Down" "Down" "Down" "Down" "Up" "Up" "Up" "Up"
## [730] "Up" "Up" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [739] "Up" "Up" "Up" "Up" "Down" "Down" "Down" "Down" "Up"
## [748] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [757] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up"
## [766] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up"
## [775] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [784] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [793] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up"
## [802] "Up" "Up" "Up" "Down" "Down" "Down" "Down" "Up" "Up"
## [811] "Up" "Down" "Up" "Up" "Up" "Down" "Up" "Up" "Up"
## [820] "Up" "Down" "Up" "Up" "Down" "Down" "Up" "Up" "Up"
## [829] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [838] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [847] "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Down"
## [856] "Down" "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Up"
## [865] "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [874] "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Up" "Up"
## [883] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [892] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up"
## [901] "Up" "Up" "Down" "Down" "Down" "Up" "Up" "Up" "Up"
## [910] "Down" "Down" "Down" "Up" "Up" "Down" "Down" "Down" "Up"
## [919] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [928] "Up" "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Down"
## [937] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [946] "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Down" "Down"
## [955] "Up" "Up" "Up" "Up" "Up" "Down" "Up" "Up" "Up"
## [964] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [973] "Up" "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [982] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [991] "Up" "Down" "Down" "Down" "Up" "Down" "Down" "Down" "Down"
## [1000] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1009] "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1018] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1027] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1036] "Down" "Down" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1045] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1054] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1063] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1072] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1081] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1090] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1099] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1108] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1117] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1126] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1135] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1144] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1153] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1162] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1171] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1180] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1189] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1198] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1207] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1216] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1225] "Up" "Up" "Down" "Up" "Up" "Up" "Up" "Up" "Up"
## [1234] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
## [1243] "Up" "Up" "Up" "Up" "Up" "Up" "Up" "Up"
table(glm.pred,Smarket$Direction)
##
## glm.pred Down Up
## Down 145 141
## Up 457 507
# True Positive Rate = True Positive/Number of Positive
(507+145)/1250
## [1] 0.5216
Multinomial Logistic Regression
Loading required package
#More then 2 dependent variable --> Multinomial regression
require(ggplot)
## Loading required package: ggplot
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ggplot'
require(foreign)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 3.3.2
require(nnet)
## Loading required package: nnet
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.3.1
library(ggplot2)
library(e1071)
library(MASS)
library(ISLR)
library(corrgram)
library(corrplot)
library(car)
library(mlr)
## Warning: package 'mlr' was built under R version 3.3.1
## Loading required package: BBmisc
## Warning: package 'BBmisc' was built under R version 3.3.1
## Loading required package: ParamHelpers
## Warning: package 'ParamHelpers' was built under R version 3.3.1
## Loading required package: stringi
##
## Attaching package: 'mlr'
## The following object is masked from 'package:e1071':
##
## impute
Exploratory Data Analysis
ml <- read.dta("http://www.ats.ucla.edu/stat/data/hsbdemo.dta")
View(ml)
str(ml)
## 'data.frame': 200 obs. of 13 variables:
## $ id : num 45 108 15 67 153 51 164 133 2 53 ...
## $ female : Factor w/ 2 levels "male","female": 2 1 1 1 1 2 1 1 2 1 ...
## $ ses : Factor w/ 3 levels "low","middle",..: 1 2 3 1 2 3 2 2 2 2 ...
## $ schtyp : Factor w/ 2 levels "public","private": 1 1 1 1 1 1 1 1 1 1 ...
## $ prog : Factor w/ 3 levels "general","academic",..: 3 1 3 3 3 1 3 3 3 3 ...
## $ read : num 34 34 39 37 39 42 31 50 39 34 ...
## $ write : num 35 33 39 37 31 36 36 31 41 37 ...
## $ math : num 41 41 44 42 40 42 46 40 33 46 ...
## $ science: num 29 36 26 33 39 31 39 34 42 39 ...
## $ socst : num 26 36 42 32 51 39 46 31 41 31 ...
## $ honors : Factor w/ 2 levels "not enrolled",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ awards : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cid : int 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "datalabel")= chr "highschool and beyond (200 cases)"
## - attr(*, "time.stamp")= chr "30 Oct 2009 14:13"
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 254 254 254 254 254 254 254 254 254 254 ...
## - attr(*, "val.labels")= chr "" "fl" "sl" "scl" ...
## - attr(*, "var.labels")= chr "" "" "" "type of school" ...
## - attr(*, "version")= int 8
## - attr(*, "label.table")=List of 5
## ..$ sl : Named int 1 2 3
## .. ..- attr(*, "names")= chr "low" "middle" "high"
## ..$ scl : Named int 1 2
## .. ..- attr(*, "names")= chr "public" "private"
## ..$ sel : Named int 1 2 3
## .. ..- attr(*, "names")= chr "general" "academic" "vocation"
## ..$ fl : Named int 0 1
## .. ..- attr(*, "names")= chr "male" "female"
## ..$ honlab: Named int 0 1
## .. ..- attr(*, "names")= chr "not enrolled" "enrolled"
summarizeColumns(ml)
## name type na mean disp median mad min max nlevs
## 1 id numeric 0 100.500 57.879185 100.5 74.1300 1 200 0
## 2 female factor 0 NA 0.455000 NA NA 91 109 2
## 3 ses factor 0 NA 0.525000 NA NA 47 95 3
## 4 schtyp factor 0 NA 0.160000 NA NA 32 168 2
## 5 prog factor 0 NA 0.475000 NA NA 45 105 3
## 6 read numeric 0 52.230 10.252937 50.0 10.3782 28 76 0
## 7 write numeric 0 52.775 9.478586 54.0 11.8608 31 67 0
## 8 math numeric 0 52.645 9.368448 52.0 10.3782 33 75 0
## 9 science numeric 0 51.850 9.900891 53.0 11.8608 26 74 0
## 10 socst numeric 0 52.405 10.735793 52.0 13.3434 26 71 0
## 11 honors factor 0 NA 0.265000 NA NA 53 147 2
## 12 awards numeric 0 1.670 1.818691 1.0 1.4826 0 7 0
## 13 cid integer 0 10.430 5.801152 10.5 8.1543 1 20 0
unique(ml$prog)
## [1] vocation general academic
## Levels: general academic vocation
names(ml)
## [1] "id" "female" "ses" "schtyp" "prog" "read" "write"
## [8] "math" "science" "socst" "honors" "awards" "cid"
head(ml)
## id female ses schtyp prog read write math science socst
## 1 45 female low public vocation 34 35 41 29 26
## 2 108 male middle public general 34 33 41 36 36
## 3 15 male high public vocation 39 39 44 26 42
## 4 67 male low public vocation 37 37 42 33 32
## 5 153 male middle public vocation 39 31 40 39 51
## 6 51 female high public general 42 36 42 31 39
## honors awards cid
## 1 not enrolled 0 1
## 2 not enrolled 0 1
## 3 not enrolled 0 1
## 4 not enrolled 0 1
## 5 not enrolled 0 1
## 6 not enrolled 0 1
table(ml$ses,ml$prog)
##
## general academic vocation
## low 16 19 12
## middle 20 44 31
## high 9 42 7