Introduction to Statistical Learning Russ Conte Saturday, April 15, 2017 Chapter 2: Statistical Learning

Advertising=read.csv('/Users/russconte/Advertising.csv', sep=',', header=TRUE)
head(Advertising)
##   X    TV Radio Newspaper Sales
## 1 1 230.1  37.8      69.2  22.1
## 2 2  44.5  39.3      45.1  10.4
## 3 3  17.2  45.9      69.3   9.3
## 4 4 151.5  41.3      58.5  18.5
## 5 5 180.8  10.8      58.4  12.9
## 6 6   8.7  48.9      75.0   7.2
attach(Advertising)
head(Advertising)
##   X    TV Radio Newspaper Sales
## 1 1 230.1  37.8      69.2  22.1
## 2 2  44.5  39.3      45.1  10.4
## 3 3  17.2  45.9      69.3   9.3
## 4 4 151.5  41.3      58.5  18.5
## 5 5 180.8  10.8      58.4  12.9
## 6 6   8.7  48.9      75.0   7.2
plot(TV, Sales, col="red")

plot(Radio, Sales, col="red")

plot(Newspaper, Sales, col="red")

x=c(1,3,2,5)
x
## [1] 1 3 2 5
y=c(1,6,2)
y
## [1] 1 6 2
length(x)
## [1] 4
length(y)
## [1] 3
x=c(1,6,2)
x+y
## [1]  2 12  4
ls()
## [1] "Advertising" "x"           "y"
rm(x,y)
ls()
## [1] "Advertising"
rm(list=ls()) #remove all objects at once
ls()
## character(0)
x=matrix(data=c(1,2,3,4), nrow=2, ncol=2)
x
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
matrix(c(1,2,3,4),nrow=2, ncol=2, byrow = TRUE)
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
sqrt(x)
##          [,1]     [,2]
## [1,] 1.000000 1.732051
## [2,] 1.414214 2.000000
x^2
##      [,1] [,2]
## [1,]    1    9
## [2,]    4   16
x=rnorm(50)
y=x+rnorm(50, mean=50, sd = 0.1)
cor(x,y)
## [1] 0.9963487
set.seed(1303)
rnorm(50)
##  [1] -1.1439763145  1.3421293656  2.1853904757  0.5363925179  0.0631929665
##  [6]  0.5022344825 -0.0004167247  0.5658198405 -0.5725226890 -1.1102250073
## [11] -0.0486871234 -0.6956562176  0.8289174803  0.2066528551 -0.2356745091
## [16] -0.5563104914 -0.3647543571  0.8623550343 -0.6307715354  0.3136021252
## [21] -0.9314953177  0.8238676185  0.5233707021  0.7069214120  0.4202043256
## [26] -0.2690521547 -1.5103172999 -0.6902124766 -0.1434719524 -1.0135274099
## [31]  1.5732737361  0.0127465055  0.8726470499  0.4220661905 -0.0188157917
## [36]  2.6157489689 -0.6931401748 -0.2663217810 -0.7206364412  1.3677342065
## [41]  0.2640073322  0.6321868074 -1.3306509858  0.0268888182  1.0406363208
## [46]  1.3120237985 -0.0300020767 -0.2500257125  0.0234144857  1.6598706557
set.seed(3)
y=rnorm(100)
mean(y)
## [1] 0.01103557
var(y)
## [1] 0.7328675
sqrt(var(y))
## [1] 0.8560768
sd(y)
## [1] 0.8560768
x=rnorm(100)
y=rnorm(100)
plot(x,y)

plot(x,y, xlab="this is the x-axis", ylab="this is the y-axis", main="Plot of x vs y")

plot(x,y, col="green")

x=seq(1,10)
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x=seq(-pi, pi, length=50)
x
##  [1] -3.14159265 -3.01336438 -2.88513611 -2.75690784 -2.62867957
##  [6] -2.50045130 -2.37222302 -2.24399475 -2.11576648 -1.98753821
## [11] -1.85930994 -1.73108167 -1.60285339 -1.47462512 -1.34639685
## [16] -1.21816858 -1.08994031 -0.96171204 -0.83348377 -0.70525549
## [21] -0.57702722 -0.44879895 -0.32057068 -0.19234241 -0.06411414
## [26]  0.06411414  0.19234241  0.32057068  0.44879895  0.57702722
## [31]  0.70525549  0.83348377  0.96171204  1.08994031  1.21816858
## [36]  1.34639685  1.47462512  1.60285339  1.73108167  1.85930994
## [41]  1.98753821  2.11576648  2.24399475  2.37222302  2.50045130
## [46]  2.62867957  2.75690784  2.88513611  3.01336438  3.14159265
y=x
f=outer(x,y, function(x,y) cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f, nlevels = 45, add=TRUE)

fa=(f-t(f))/2
contour(x,y,fa, nlevels = 15)

image(x,y,fa)

persp(x,y,fa)

persp(x,y,fa, theta = 30)

persp(x,y,fa, theta=30, phi = 20)

persp(x,y,fa,theta = 30, phi = 70)

persp(x,y,fa,theta = 30, phi = 40)

2.3.3 Indexing Data

A=matrix(1:16, 4,4)
A
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
A[2,3]
## [1] 10
A[c(1,3),c(2,4)]
##      [,1] [,2]
## [1,]    5   13
## [2,]    7   15
A[1:3, 2:4]
##      [,1] [,2] [,3]
## [1,]    5    9   13
## [2,]    6   10   14
## [3,]    7   11   15
A[1:2,]
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
A[,1:2]
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
## [3,]    3    7
## [4,]    4    8
A[1,]
## [1]  1  5  9 13
A[-c(1,3), -c(1,3,4)]
## [1] 6 8
dim(A)
## [1] 4 4
Auto=read.table('/Users/russconte/Auto.data', header=TRUE)
head(Auto)
##   mpg cylinders displacement horsepower weight acceleration year origin
## 1  18         8          307      130.0   3504         12.0   70      1
## 2  15         8          350      165.0   3693         11.5   70      1
## 3  18         8          318      150.0   3436         11.0   70      1
## 4  16         8          304      150.0   3433         12.0   70      1
## 5  17         8          302      140.0   3449         10.5   70      1
## 6  15         8          429      198.0   4341         10.0   70      1
##                        name
## 1 chevrolet chevelle malibu
## 2         buick skylark 320
## 3        plymouth satellite
## 4             amc rebel sst
## 5               ford torino
## 6          ford galaxie 500
Auto=read.table('/Users/russconte/Auto.data', header=TRUE, na.strings = "?")
dim(Auto)
## [1] 397   9
Auto[1:4,]
##   mpg cylinders displacement horsepower weight acceleration year origin
## 1  18         8          307        130   3504         12.0   70      1
## 2  15         8          350        165   3693         11.5   70      1
## 3  18         8          318        150   3436         11.0   70      1
## 4  16         8          304        150   3433         12.0   70      1
##                        name
## 1 chevrolet chevelle malibu
## 2         buick skylark 320
## 3        plymouth satellite
## 4             amc rebel sst
names(Auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"  
## [5] "weight"       "acceleration" "year"         "origin"      
## [9] "name"
attach(Auto)
plot(cylinders, mpg)

plot(cylinders, mpg, col="red")

plot(cylinders, mpg, col="red")
plot(cylinders, mpg, col="red", xlab="cylinders", ylab="MPG")

hist(mpg)

hist(mpg, col = 2)

hist(mpg, col=2, breaks = 15)

pairs(Auto)

pairs(~mpg+displacement+horsepower+weight+acceleration, Auto)

plot(horsepower, mpg)
identify(horsepower, mpg, name)

## integer(0)
summary(Auto)
##       mpg          cylinders      displacement     horsepower   
##  Min.   : 9.00   Min.   :3.000   Min.   : 68.0   Min.   : 46.0  
##  1st Qu.:17.50   1st Qu.:4.000   1st Qu.:104.0   1st Qu.: 75.0  
##  Median :23.00   Median :4.000   Median :146.0   Median : 93.5  
##  Mean   :23.52   Mean   :5.458   Mean   :193.5   Mean   :104.5  
##  3rd Qu.:29.00   3rd Qu.:8.000   3rd Qu.:262.0   3rd Qu.:126.0  
##  Max.   :46.60   Max.   :8.000   Max.   :455.0   Max.   :230.0  
##                                                  NA's   :5      
##      weight      acceleration        year           origin     
##  Min.   :1613   Min.   : 8.00   Min.   :70.00   Min.   :1.000  
##  1st Qu.:2223   1st Qu.:13.80   1st Qu.:73.00   1st Qu.:1.000  
##  Median :2800   Median :15.50   Median :76.00   Median :1.000  
##  Mean   :2970   Mean   :15.56   Mean   :75.99   Mean   :1.574  
##  3rd Qu.:3609   3rd Qu.:17.10   3rd Qu.:79.00   3rd Qu.:2.000  
##  Max.   :5140   Max.   :24.80   Max.   :82.00   Max.   :3.000  
##                                                                
##              name    
##  ford pinto    :  6  
##  amc matador   :  5  
##  ford maverick :  5  
##  toyota corolla:  5  
##  amc gremlin   :  4  
##  amc hornet    :  4  
##  (Other)       :368
summary(mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   17.50   23.00   23.52   29.00   46.60
q
## function (save = "default", status = 0, runLast = TRUE) 
## .Internal(quit(save, status, runLast))
## <bytecode: 0x7fd228fd1c70>
## <environment: namespace:base>