housing <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1//BostonHousing.csv")
str(housing)
## 'data.frame':    506 obs. of  14 variables:
##  $ CRIM   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ ZN     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ INDUS  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ CHAS   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NOX    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ RM     : num  6.58 6.42 7.18 7 7.15 ...
##  $ AGE    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ DIS    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ RAD    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ TAX    : int  296 242 242 222 222 222 311 311 311 311 ...
##  $ PTRATIO: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ B      : num  397 397 393 395 397 ...
##  $ LSTAT  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ MEDV   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
head(housing)
##      CRIM ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##   MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7

Función scale

Scale() es una función genérica cuyo método por defecto centra y/o escala las columnas de una matriz numérica.

housing.z <- scale(housing, center = TRUE, scale = TRUE)#con respecto a la media y desv std
head(housing.z)
##            CRIM         ZN      INDUS       CHAS        NOX        RM
## [1,] -0.4193669  0.2845483 -1.2866362 -0.2723291 -0.1440749 0.4132629
## [2,] -0.4169267 -0.4872402 -0.5927944 -0.2723291 -0.7395304 0.1940824
## [3,] -0.4169290 -0.4872402 -0.5927944 -0.2723291 -0.7395304 1.2814456
## [4,] -0.4163384 -0.4872402 -1.3055857 -0.2723291 -0.8344581 1.0152978
## [5,] -0.4120741 -0.4872402 -1.3055857 -0.2723291 -0.8344581 1.2273620
## [6,] -0.4166314 -0.4872402 -1.3055857 -0.2723291 -0.8344581 0.2068916
##             AGE      DIS        RAD        TAX    PTRATIO         B      LSTAT
## [1,] -0.1198948 0.140075 -0.9818712 -0.6659492 -1.4575580 0.4406159 -1.0744990
## [2,]  0.3668034 0.556609 -0.8670245 -0.9863534 -0.3027945 0.4406159 -0.4919525
## [3,] -0.2655490 0.556609 -0.8670245 -0.9863534 -0.3027945 0.3960351 -1.2075324
## [4,] -0.8090878 1.076671 -0.7521778 -1.1050216  0.1129203 0.4157514 -1.3601708
## [5,] -0.5106743 1.076671 -0.7521778 -1.1050216  0.1129203 0.4406159 -1.0254866
## [6,] -0.3508100 1.076671 -0.7521778 -1.1050216  0.1129203 0.4101651 -1.0422909
##            MEDV
## [1,]  0.1595278
## [2,] -0.1014239
## [3,]  1.3229375
## [4,]  1.1815886
## [5,]  1.4860323
## [6,]  0.6705582
housing.mean <- scale(housing, center = TRUE, scale = FALSE)#sólo con respecto a la media
head(housing.mean)
##           CRIM         ZN     INDUS        CHAS         NOX        RM
## [1,] -3.607204   6.636364 -8.826779 -0.06916996 -0.01669506 0.2903656
## [2,] -3.586214 -11.363636 -4.066779 -0.06916996 -0.08569506 0.1363656
## [3,] -3.586234 -11.363636 -4.066779 -0.06916996 -0.08569506 0.9003656
## [4,] -3.581154 -11.363636 -8.956779 -0.06916996 -0.09669506 0.7133656
## [5,] -3.544474 -11.363636 -8.956779 -0.06916996 -0.09669506 0.8623656
## [6,] -3.583674 -11.363636 -8.956779 -0.06916996 -0.09669506 0.1453656
##             AGE       DIS       RAD       TAX    PTRATIO        B     LSTAT
## [1,]  -3.374901 0.2949573 -8.549407 -112.2372 -3.1555336 40.22597 -7.673063
## [2,]  10.325099 1.1720573 -7.549407 -166.2372 -0.6555336 40.22597 -3.513063
## [3,]  -7.474901 1.1720573 -7.549407 -166.2372 -0.6555336 36.15597 -8.623063
## [4,] -22.774901 2.2671573 -6.549407 -186.2372  0.2444664 37.95597 -9.713063
## [5,] -14.374901 2.2671573 -6.549407 -186.2372  0.2444664 40.22597 -7.323063
## [6,]  -9.874901 2.2671573 -6.549407 -186.2372  0.2444664 37.44597 -7.443063
##            MEDV
## [1,]  1.4671937
## [2,] -0.9328063
## [3,] 12.1671937
## [4,] 10.8671937
## [5,] 13.6671937
## [6,]  6.1671937
housing.sd <- scale(housing, center = FALSE, scale = TRUE)#sólo con respecto a la desv std
head(housing.sd)
##              CRIM        ZN     INDUS CHAS       NOX       RM       AGE
## [1,] 0.0006773027 0.6936817 0.1764759    0 0.9485077 1.038710 0.8788221
## [2,] 0.0029267624 0.0000000 0.5401231    0 0.8268590 1.014381 1.0634826
## [3,] 0.0029246190 0.0000000 0.5401231    0 0.8268590 1.135077 0.8235588
## [4,] 0.0034690332 0.0000000 0.1665443    0 0.8074657 1.105535 0.6173321
## [5,] 0.0073999612 0.0000000 0.1665443    0 0.8074657 1.129073 0.7305546
## [6,] 0.0031989695 0.0000000 0.1665443    0 0.8074657 1.015803 0.7912095
##            DIS        RAD       TAX   PTRATIO        B     LSTAT      MEDV
## [1,] 0.9416646 0.07733878 0.6696347 0.8225709 1.077026 0.3425033 0.9852965
## [2,] 1.1436045 0.15467755 0.5474716 0.9569779 1.077026 0.6286104 0.8867668
## [3,] 1.1436045 0.15467755 0.5474716 0.9569779 1.065982 0.2771663 1.4245745
## [4,] 1.3957357 0.23201633 0.5022260 1.0053645 1.070867 0.2022007 1.3712043
## [5,] 1.3957357 0.23201633 0.5022260 1.0053645 1.077026 0.3665748 1.4861555
## [6,] 1.3957357 0.23201633 0.5022260 1.0053645 1.069483 0.3583217 1.1782504
housing.none <- scale(housing, center = FALSE, scale = FALSE) #no hace nada
head(housing.none)
##         CRIM ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## [1,] 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## [2,] 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## [3,] 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## [4,] 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## [5,] 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## [6,] 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##      MEDV
## [1,] 24.0
## [2,] 21.6
## [3,] 34.7
## [4,] 33.4
## [5,] 36.2
## [6,] 28.7
#sd = sqrt(sum(x^2)/(n-1))

Una función muy similar al ejemplo de rescale.

scale.many = function(dataframe, cols){
  names <- names(dataframe)
  for(col in cols){
    name <- paste(names[col], "z", sep = ".")
    dataframe[name] <- scale(dataframe[,col])
  }
  cat(paste("Hemos normalizado ", length(cols), " variable(s)"))
  dataframe
}
head(housing <- scale.many(housing, c(1, 3, 5:8)))
## Hemos normalizado  6  variable(s)
##      CRIM ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##   MEDV     CRIM.z    INDUS.z      NOX.z      RM.z      AGE.z    DIS.z
## 1 24.0 -0.4193669 -1.2866362 -0.1440749 0.4132629 -0.1198948 0.140075
## 2 21.6 -0.4169267 -0.5927944 -0.7395304 0.1940824  0.3668034 0.556609
## 3 34.7 -0.4169290 -0.5927944 -0.7395304 1.2814456 -0.2655490 0.556609
## 4 33.4 -0.4163384 -1.3055857 -0.8344581 1.0152978 -0.8090878 1.076671
## 5 36.2 -0.4120741 -1.3055857 -0.8344581 1.2273620 -0.5106743 1.076671
## 6 28.7 -0.4166314 -1.3055857 -0.8344581 0.2068916 -0.3508100 1.076671