1 Calling libraries

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.1
library(MASS)
## Warning: package 'MASS' was built under R version 3.3.2
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.3.2
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.2
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.2
library(car)
## Warning: package 'car' was built under R version 3.3.2

2 Exploring Smarket data set

names(Smarket)
## [1] "Year"      "Lag1"      "Lag2"      "Lag3"      "Lag4"      "Lag5"     
## [7] "Volume"    "Today"     "Direction"
attach(Smarket)
str(Smarket)
## 'data.frame':    1250 obs. of  9 variables:
##  $ Year     : num  2001 2001 2001 2001 2001 ...
##  $ Lag1     : num  0.381 0.959 1.032 -0.623 0.614 ...
##  $ Lag2     : num  -0.192 0.381 0.959 1.032 -0.623 ...
##  $ Lag3     : num  -2.624 -0.192 0.381 0.959 1.032 ...
##  $ Lag4     : num  -1.055 -2.624 -0.192 0.381 0.959 ...
##  $ Lag5     : num  5.01 -1.055 -2.624 -0.192 0.381 ...
##  $ Volume   : num  1.19 1.3 1.41 1.28 1.21 ...
##  $ Today    : num  0.959 1.032 -0.623 0.614 0.213 ...
##  $ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
dim(Smarket)
## [1] 1250    9
#cor(Smarket)
C <-cor(Smarket[,-9])
corrgram(C)

corrplot(C)

corrplot(C,order="hclust")

corrplot(C,order="hclust",method="square",tl.cex = 0.6,cl.cex = 0.6)

#tl = text labels
#cl = color legend

3 Running Logistic Regression equation

glm.fit <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data = Smarket, family = binomial())
summary(glm.fit)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial(), data = Smarket)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.446  -1.203   1.065   1.145   1.326  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000   0.240736  -0.523    0.601
## Lag1        -0.073074   0.050167  -1.457    0.145
## Lag2        -0.042301   0.050086  -0.845    0.398
## Lag3         0.011085   0.049939   0.222    0.824
## Lag4         0.009359   0.049974   0.187    0.851
## Lag5         0.010313   0.049511   0.208    0.835
## Volume       0.135441   0.158360   0.855    0.392
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1731.2  on 1249  degrees of freedom
## Residual deviance: 1727.6  on 1243  degrees of freedom
## AIC: 1741.6
## 
## Number of Fisher Scoring iterations: 3
#Null Deviance and Residual Deviance difference has to be high for a good Model
# AIC of two models has to be compared for better model 
contrasts(Direction)
##      Up
## Down  0
## Up    1
#GLM model is build upon the UP variable (1 or 0) --> This is 1 --> UP
table(Direction)
## Direction
## Down   Up 
##  602  648
#Checking the coefficient of all variable 
coefficients(glm.fit)
##  (Intercept)         Lag1         Lag2         Lag3         Lag4 
## -0.126000257 -0.073073746 -0.042301344  0.011085108  0.009358938 
##         Lag5       Volume 
##  0.010313068  0.135440659
#Checking the residul values
glm.fit$residuals[1250]
##     1250 
## -2.07433
glm.fit$fitted.values[1250]
##      1250 
## 0.5179166
# Calculating the probability of Stock market to go up 
glm.prob <-predict(glm.fit, type ="response")
glm.prob[1:10]
##         1         2         3         4         5         6         7 
## 0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 0.4926509 
##         8         9        10 
## 0.5092292 0.5176135 0.4888378
glm.pred <- rep("Down",1250)
glm.pred1 <- rep("Down",nrow(Smarket))
glm.pred[glm.prob> 0.5]<-"Up" # Substituting the value "Up" for the probablity >= 0.5
glm.pred
##    [1] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##   [10] "Down" "Down" "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##   [19] "Up"   "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Down"
##   [28] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"  
##   [37] "Up"   "Down" "Down" "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##   [46] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Down" "Down" "Down"
##   [55] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Down" "Down" "Up"  
##   [64] "Down" "Down" "Down" "Down" "Up"   "Down" "Down" "Up"   "Up"  
##   [73] "Up"   "Down" "Down" "Down" "Up"   "Down" "Up"   "Up"   "Down"
##   [82] "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down"
##   [91] "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [100] "Down" "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [109] "Up"   "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Up"  
##  [118] "Up"   "Down" "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##  [127] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [136] "Up"   "Down" "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"  
##  [145] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [154] "Up"   "Down" "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"  
##  [163] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [172] "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Down" "Down"
##  [181] "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"  
##  [190] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Down"
##  [199] "Down" "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down" "Up"  
##  [208] "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##  [217] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Down"
##  [226] "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [235] "Down" "Down" "Up"   "Up"   "Down" "Down" "Down" "Down" "Up"  
##  [244] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [253] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [262] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [271] "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [280] "Down" "Down" "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [289] "Up"   "Down" "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"  
##  [298] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [307] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [316] "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [325] "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"  
##  [334] "Down" "Down" "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Down"
##  [343] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"  
##  [352] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"  
##  [361] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"  
##  [370] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [379] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Down"
##  [388] "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down"
##  [397] "Up"   "Up"   "Down" "Down" "Up"   "Down" "Up"   "Up"   "Down"
##  [406] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"  
##  [415] "Down" "Down" "Down" "Down" "Up"   "Up"   "Down" "Up"   "Up"  
##  [424] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Down"
##  [433] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down"
##  [442] "Down" "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##  [451] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"  
##  [460] "Up"   "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Down"
##  [469] "Down" "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [478] "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Down" "Down"
##  [487] "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Down"
##  [496] "Down" "Down" "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [505] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [514] "Down" "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"  
##  [523] "Up"   "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Up"  
##  [532] "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"  
##  [541] "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down" "Up"   "Up"  
##  [550] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [559] "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down"
##  [568] "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Down" "Down"
##  [577] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down"
##  [586] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down"
##  [595] "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [604] "Up"   "Up"   "Down" "Up"   "Up"   "Down" "Down" "Up"   "Up"  
##  [613] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [622] "Down" "Down" "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [631] "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Down" "Down" "Up"  
##  [640] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down"
##  [649] "Down" "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Up"  
##  [658] "Up"   "Down" "Down" "Down" "Down" "Down" "Up"   "Up"   "Up"  
##  [667] "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Down" "Up"   "Up"  
##  [676] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Down"
##  [685] "Down" "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Down"
##  [694] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [703] "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [712] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [721] "Up"   "Down" "Down" "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##  [730] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [739] "Up"   "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down" "Up"  
##  [748] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [757] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [766] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [775] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [784] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [793] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"  
##  [802] "Up"   "Up"   "Up"   "Down" "Down" "Down" "Down" "Up"   "Up"  
##  [811] "Up"   "Down" "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"  
##  [820] "Up"   "Down" "Up"   "Up"   "Down" "Down" "Up"   "Up"   "Up"  
##  [829] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [838] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [847] "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down"
##  [856] "Down" "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"  
##  [865] "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [874] "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"  
##  [883] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [892] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"  
##  [901] "Up"   "Up"   "Down" "Down" "Down" "Up"   "Up"   "Up"   "Up"  
##  [910] "Down" "Down" "Down" "Up"   "Up"   "Down" "Down" "Down" "Up"  
##  [919] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [928] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Down"
##  [937] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [946] "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Down"
##  [955] "Up"   "Up"   "Up"   "Up"   "Up"   "Down" "Up"   "Up"   "Up"  
##  [964] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [973] "Up"   "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [982] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
##  [991] "Up"   "Down" "Down" "Down" "Up"   "Down" "Down" "Down" "Down"
## [1000] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1009] "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1018] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1027] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1036] "Down" "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1045] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1054] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1063] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1072] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1081] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1090] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1099] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1108] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1117] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1126] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1135] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1144] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1153] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1162] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1171] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1180] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1189] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1198] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1207] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1216] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1225] "Up"   "Up"   "Down" "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1234] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"  
## [1243] "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"   "Up"
table(glm.pred,Smarket$Direction)
##         
## glm.pred Down  Up
##     Down  145 141
##     Up    457 507
# True Positive Rate = True Positive/Number of Positive
(507+145)/1250
## [1] 0.5216

4 Multinomial Logistic Regression

4.1 Loading required package

#More then 2 dependent variable --> Multinomial regression
require(ggplot)
## Loading required package: ggplot
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ggplot'
require(foreign)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 3.3.2
require(nnet)
## Loading required package: nnet
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.3.1
library(ggplot2)
library(e1071)
library(MASS)
library(ISLR)
library(corrgram)
library(corrplot)
library(car)
library(mlr)
## Warning: package 'mlr' was built under R version 3.3.1
## Loading required package: BBmisc
## Warning: package 'BBmisc' was built under R version 3.3.1
## Loading required package: ParamHelpers
## Warning: package 'ParamHelpers' was built under R version 3.3.1
## Loading required package: stringi
## 
## Attaching package: 'mlr'
## The following object is masked from 'package:e1071':
## 
##     impute

4.2 Exploratory Data Analysis

ml <- read.dta("http://www.ats.ucla.edu/stat/data/hsbdemo.dta")
View(ml)
str(ml)
## 'data.frame':    200 obs. of  13 variables:
##  $ id     : num  45 108 15 67 153 51 164 133 2 53 ...
##  $ female : Factor w/ 2 levels "male","female": 2 1 1 1 1 2 1 1 2 1 ...
##  $ ses    : Factor w/ 3 levels "low","middle",..: 1 2 3 1 2 3 2 2 2 2 ...
##  $ schtyp : Factor w/ 2 levels "public","private": 1 1 1 1 1 1 1 1 1 1 ...
##  $ prog   : Factor w/ 3 levels "general","academic",..: 3 1 3 3 3 1 3 3 3 3 ...
##  $ read   : num  34 34 39 37 39 42 31 50 39 34 ...
##  $ write  : num  35 33 39 37 31 36 36 31 41 37 ...
##  $ math   : num  41 41 44 42 40 42 46 40 33 46 ...
##  $ science: num  29 36 26 33 39 31 39 34 42 39 ...
##  $ socst  : num  26 36 42 32 51 39 46 31 41 31 ...
##  $ honors : Factor w/ 2 levels "not enrolled",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ awards : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cid    : int  1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "datalabel")= chr "highschool and beyond (200 cases)"
##  - attr(*, "time.stamp")= chr "30 Oct 2009 14:13"
##  - attr(*, "formats")= chr  "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int  254 254 254 254 254 254 254 254 254 254 ...
##  - attr(*, "val.labels")= chr  "" "fl" "sl" "scl" ...
##  - attr(*, "var.labels")= chr  "" "" "" "type of school" ...
##  - attr(*, "version")= int 8
##  - attr(*, "label.table")=List of 5
##   ..$ sl    : Named int  1 2 3
##   .. ..- attr(*, "names")= chr  "low" "middle" "high"
##   ..$ scl   : Named int  1 2
##   .. ..- attr(*, "names")= chr  "public" "private"
##   ..$ sel   : Named int  1 2 3
##   .. ..- attr(*, "names")= chr  "general" "academic" "vocation"
##   ..$ fl    : Named int  0 1
##   .. ..- attr(*, "names")= chr  "male" "female"
##   ..$ honlab: Named int  0 1
##   .. ..- attr(*, "names")= chr  "not enrolled" "enrolled"
summarizeColumns(ml)
##       name    type na    mean      disp median     mad min max nlevs
## 1       id numeric  0 100.500 57.879185  100.5 74.1300   1 200     0
## 2   female  factor  0      NA  0.455000     NA      NA  91 109     2
## 3      ses  factor  0      NA  0.525000     NA      NA  47  95     3
## 4   schtyp  factor  0      NA  0.160000     NA      NA  32 168     2
## 5     prog  factor  0      NA  0.475000     NA      NA  45 105     3
## 6     read numeric  0  52.230 10.252937   50.0 10.3782  28  76     0
## 7    write numeric  0  52.775  9.478586   54.0 11.8608  31  67     0
## 8     math numeric  0  52.645  9.368448   52.0 10.3782  33  75     0
## 9  science numeric  0  51.850  9.900891   53.0 11.8608  26  74     0
## 10   socst numeric  0  52.405 10.735793   52.0 13.3434  26  71     0
## 11  honors  factor  0      NA  0.265000     NA      NA  53 147     2
## 12  awards numeric  0   1.670  1.818691    1.0  1.4826   0   7     0
## 13     cid integer  0  10.430  5.801152   10.5  8.1543   1  20     0
unique(ml$prog)
## [1] vocation general  academic
## Levels: general academic vocation
names(ml)
##  [1] "id"      "female"  "ses"     "schtyp"  "prog"    "read"    "write"  
##  [8] "math"    "science" "socst"   "honors"  "awards"  "cid"
head(ml)
##    id female    ses schtyp     prog read write math science socst
## 1  45 female    low public vocation   34    35   41      29    26
## 2 108   male middle public  general   34    33   41      36    36
## 3  15   male   high public vocation   39    39   44      26    42
## 4  67   male    low public vocation   37    37   42      33    32
## 5 153   male middle public vocation   39    31   40      39    51
## 6  51 female   high public  general   42    36   42      31    39
##         honors awards cid
## 1 not enrolled      0   1
## 2 not enrolled      0   1
## 3 not enrolled      0   1
## 4 not enrolled      0   1
## 5 not enrolled      0   1
## 6 not enrolled      0   1
table(ml$ses,ml$prog)
##         
##          general academic vocation
##   low         16       19       12
##   middle      20       44       31
##   high         9       42        7