housing <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1//BostonHousing.csv")
str(housing)
## 'data.frame': 506 obs. of 14 variables:
## $ CRIM : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ ZN : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ INDUS : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ CHAS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NOX : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ RM : num 6.58 6.42 7.18 7 7.15 ...
## $ AGE : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ DIS : num 4.09 4.97 4.97 6.06 6.06 ...
## $ RAD : int 1 2 2 3 3 3 5 5 5 5 ...
## $ TAX : int 296 242 242 222 222 222 311 311 311 311 ...
## $ PTRATIO: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ B : num 397 397 393 395 397 ...
## $ LSTAT : num 4.98 9.14 4.03 2.94 5.33 ...
## $ MEDV : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
head(housing)
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
Scale() es una función genérica cuyo método por defecto centra y/o escala las columnas de una matriz numérica.
housing.z <- scale(housing, center = TRUE, scale = TRUE)#con respecto a la media y desv std
head(housing.z)
## CRIM ZN INDUS CHAS NOX RM
## [1,] -0.4193669 0.2845483 -1.2866362 -0.2723291 -0.1440749 0.4132629
## [2,] -0.4169267 -0.4872402 -0.5927944 -0.2723291 -0.7395304 0.1940824
## [3,] -0.4169290 -0.4872402 -0.5927944 -0.2723291 -0.7395304 1.2814456
## [4,] -0.4163384 -0.4872402 -1.3055857 -0.2723291 -0.8344581 1.0152978
## [5,] -0.4120741 -0.4872402 -1.3055857 -0.2723291 -0.8344581 1.2273620
## [6,] -0.4166314 -0.4872402 -1.3055857 -0.2723291 -0.8344581 0.2068916
## AGE DIS RAD TAX PTRATIO B LSTAT
## [1,] -0.1198948 0.140075 -0.9818712 -0.6659492 -1.4575580 0.4406159 -1.0744990
## [2,] 0.3668034 0.556609 -0.8670245 -0.9863534 -0.3027945 0.4406159 -0.4919525
## [3,] -0.2655490 0.556609 -0.8670245 -0.9863534 -0.3027945 0.3960351 -1.2075324
## [4,] -0.8090878 1.076671 -0.7521778 -1.1050216 0.1129203 0.4157514 -1.3601708
## [5,] -0.5106743 1.076671 -0.7521778 -1.1050216 0.1129203 0.4406159 -1.0254866
## [6,] -0.3508100 1.076671 -0.7521778 -1.1050216 0.1129203 0.4101651 -1.0422909
## MEDV
## [1,] 0.1595278
## [2,] -0.1014239
## [3,] 1.3229375
## [4,] 1.1815886
## [5,] 1.4860323
## [6,] 0.6705582
housing.mean <- scale(housing, center = TRUE, scale = FALSE)#sólo con respecto a la media
head(housing.mean)
## CRIM ZN INDUS CHAS NOX RM
## [1,] -3.607204 6.636364 -8.826779 -0.06916996 -0.01669506 0.2903656
## [2,] -3.586214 -11.363636 -4.066779 -0.06916996 -0.08569506 0.1363656
## [3,] -3.586234 -11.363636 -4.066779 -0.06916996 -0.08569506 0.9003656
## [4,] -3.581154 -11.363636 -8.956779 -0.06916996 -0.09669506 0.7133656
## [5,] -3.544474 -11.363636 -8.956779 -0.06916996 -0.09669506 0.8623656
## [6,] -3.583674 -11.363636 -8.956779 -0.06916996 -0.09669506 0.1453656
## AGE DIS RAD TAX PTRATIO B LSTAT
## [1,] -3.374901 0.2949573 -8.549407 -112.2372 -3.1555336 40.22597 -7.673063
## [2,] 10.325099 1.1720573 -7.549407 -166.2372 -0.6555336 40.22597 -3.513063
## [3,] -7.474901 1.1720573 -7.549407 -166.2372 -0.6555336 36.15597 -8.623063
## [4,] -22.774901 2.2671573 -6.549407 -186.2372 0.2444664 37.95597 -9.713063
## [5,] -14.374901 2.2671573 -6.549407 -186.2372 0.2444664 40.22597 -7.323063
## [6,] -9.874901 2.2671573 -6.549407 -186.2372 0.2444664 37.44597 -7.443063
## MEDV
## [1,] 1.4671937
## [2,] -0.9328063
## [3,] 12.1671937
## [4,] 10.8671937
## [5,] 13.6671937
## [6,] 6.1671937
housing.sd <- scale(housing, center = FALSE, scale = TRUE)#sólo con respecto a la desv std
head(housing.sd)
## CRIM ZN INDUS CHAS NOX RM AGE
## [1,] 0.0006773027 0.6936817 0.1764759 0 0.9485077 1.038710 0.8788221
## [2,] 0.0029267624 0.0000000 0.5401231 0 0.8268590 1.014381 1.0634826
## [3,] 0.0029246190 0.0000000 0.5401231 0 0.8268590 1.135077 0.8235588
## [4,] 0.0034690332 0.0000000 0.1665443 0 0.8074657 1.105535 0.6173321
## [5,] 0.0073999612 0.0000000 0.1665443 0 0.8074657 1.129073 0.7305546
## [6,] 0.0031989695 0.0000000 0.1665443 0 0.8074657 1.015803 0.7912095
## DIS RAD TAX PTRATIO B LSTAT MEDV
## [1,] 0.9416646 0.07733878 0.6696347 0.8225709 1.077026 0.3425033 0.9852965
## [2,] 1.1436045 0.15467755 0.5474716 0.9569779 1.077026 0.6286104 0.8867668
## [3,] 1.1436045 0.15467755 0.5474716 0.9569779 1.065982 0.2771663 1.4245745
## [4,] 1.3957357 0.23201633 0.5022260 1.0053645 1.070867 0.2022007 1.3712043
## [5,] 1.3957357 0.23201633 0.5022260 1.0053645 1.077026 0.3665748 1.4861555
## [6,] 1.3957357 0.23201633 0.5022260 1.0053645 1.069483 0.3583217 1.1782504
housing.none <- scale(housing, center = FALSE, scale = FALSE) #no hace nada
head(housing.none)
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## [1,] 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## [2,] 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## [3,] 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## [4,] 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## [5,] 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## [6,] 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## MEDV
## [1,] 24.0
## [2,] 21.6
## [3,] 34.7
## [4,] 33.4
## [5,] 36.2
## [6,] 28.7
#sd = sqrt(sum(x^2)/(n-1))
Una función muy similar al ejemplo de rescale.
scale.many = function(dataframe, cols){
names <- names(dataframe)
for(col in cols){
name <- paste(names[col], "z", sep = ".")
dataframe[name] <- scale(dataframe[,col])
}
cat(paste("Hemos normalizado ", length(cols), " variable(s)"))
dataframe
}
head(housing <- scale.many(housing, c(1, 3, 5:8)))
## Hemos normalizado 6 variable(s)
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## MEDV CRIM.z INDUS.z NOX.z RM.z AGE.z DIS.z
## 1 24.0 -0.4193669 -1.2866362 -0.1440749 0.4132629 -0.1198948 0.140075
## 2 21.6 -0.4169267 -0.5927944 -0.7395304 0.1940824 0.3668034 0.556609
## 3 34.7 -0.4169290 -0.5927944 -0.7395304 1.2814456 -0.2655490 0.556609
## 4 33.4 -0.4163384 -1.3055857 -0.8344581 1.0152978 -0.8090878 1.076671
## 5 36.2 -0.4120741 -1.3055857 -0.8344581 1.2273620 -0.5106743 1.076671
## 6 28.7 -0.4166314 -1.3055857 -0.8344581 0.2068916 -0.3508100 1.076671