Carga y vistazo rápido del dataset

library(scales)
## Warning: package 'scales' was built under R version 4.1.3
students <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1/data-conversion.csv")
str(students)
## 'data.frame':    10 obs. of  5 variables:
##  $ Age   : int  23 13 36 31 58 29 39 50 23 36
##  $ State : chr  "NJ" "NY" "NJ" "VA" ...
##  $ Gender: chr  "F" "M" "M" "F" ...
##  $ Height: int  61 55 66 64 70 63 67 70 61 66
##  $ Income: int  5000 1000 3000 4000 30000 10000 50000 55000 2000 20000
head(students, 10)
##    Age State Gender Height Income
## 1   23    NJ      F     61   5000
## 2   13    NY      M     55   1000
## 3   36    NJ      M     66   3000
## 4   31    VA      F     64   4000
## 5   58    NY      F     70  30000
## 6   29    TX      F     63  10000
## 7   39    NJ      M     67  50000
## 8   50    VA      M     70  55000
## 9   23    TX      F     61   2000
## 10  36    VA      M     66  20000

Función rescale()

La función rescale() devuelve un vector de valores continuos vector de acuerdo a un valor máximo y mínimo.

print(students$Income.rescaled <- rescale(students$Income))
##  [1] 0.07407407 0.00000000 0.03703704 0.05555556 0.53703704 0.16666667
##  [7] 0.90740741 1.00000000 0.01851852 0.35185185

Esto es lo que básicamente haría la función.

(students$Income - min(students$Income))/
  (max(students$Income) - min(students$Income))
##  [1] 0.07407407 0.00000000 0.03703704 0.05555556 0.53703704 0.16666667
##  [7] 0.90740741 1.00000000 0.01851852 0.35185185
((students$Income - min(students$Income))/
  (max(students$Income) - min(students$Income))) == (students$Income.rescaled <- rescale(students$Income))
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

Se le puede especificar un máximo y mínimo.

rescale(students$Income, to = c(0, 100))
##  [1]   7.407407   0.000000   3.703704   5.555556  53.703704  16.666667
##  [7]  90.740741 100.000000   1.851852  35.185185

Una función, que dado un dataset y un vector de valores, reescale las columnas que se encuentran en dicho vector.

rescale.many <- function(dataframe, cols){
  names <- names(dataframe)
  for(col in cols){
    name <- paste(names[col], "rescaled", sep = ".")
    dataframe[name] <- rescale(dataframe[,col]) 
  }
  cat(paste("Hemos reescalado ", length(cols), " variable(s)"))
  dataframe
}


students <- rescale.many(students, c(1,4,5))
## Hemos reescalado  3  variable(s)
print(students)
##    Age State Gender Height Income Income.rescaled Age.rescaled Height.rescaled
## 1   23    NJ      F     61   5000      0.07407407    0.2222222       0.4000000
## 2   13    NY      M     55   1000      0.00000000    0.0000000       0.0000000
## 3   36    NJ      M     66   3000      0.03703704    0.5111111       0.7333333
## 4   31    VA      F     64   4000      0.05555556    0.4000000       0.6000000
## 5   58    NY      F     70  30000      0.53703704    1.0000000       1.0000000
## 6   29    TX      F     63  10000      0.16666667    0.3555556       0.5333333
## 7   39    NJ      M     67  50000      0.90740741    0.5777778       0.8000000
## 8   50    VA      M     70  55000      1.00000000    0.8222222       1.0000000
## 9   23    TX      F     61   2000      0.01851852    0.2222222       0.4000000
## 10  36    VA      M     66  20000      0.35185185    0.5111111       0.7333333