library(scales)
## Warning: package 'scales' was built under R version 4.1.3
students <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1/data-conversion.csv")
str(students)
## 'data.frame': 10 obs. of 5 variables:
## $ Age : int 23 13 36 31 58 29 39 50 23 36
## $ State : chr "NJ" "NY" "NJ" "VA" ...
## $ Gender: chr "F" "M" "M" "F" ...
## $ Height: int 61 55 66 64 70 63 67 70 61 66
## $ Income: int 5000 1000 3000 4000 30000 10000 50000 55000 2000 20000
head(students, 10)
## Age State Gender Height Income
## 1 23 NJ F 61 5000
## 2 13 NY M 55 1000
## 3 36 NJ M 66 3000
## 4 31 VA F 64 4000
## 5 58 NY F 70 30000
## 6 29 TX F 63 10000
## 7 39 NJ M 67 50000
## 8 50 VA M 70 55000
## 9 23 TX F 61 2000
## 10 36 VA M 66 20000
rescale()La función rescale() devuelve un vector de valores continuos vector de acuerdo a un valor máximo y mínimo.
print(students$Income.rescaled <- rescale(students$Income))
## [1] 0.07407407 0.00000000 0.03703704 0.05555556 0.53703704 0.16666667
## [7] 0.90740741 1.00000000 0.01851852 0.35185185
Esto es lo que básicamente haría la función.
(students$Income - min(students$Income))/
(max(students$Income) - min(students$Income))
## [1] 0.07407407 0.00000000 0.03703704 0.05555556 0.53703704 0.16666667
## [7] 0.90740741 1.00000000 0.01851852 0.35185185
((students$Income - min(students$Income))/
(max(students$Income) - min(students$Income))) == (students$Income.rescaled <- rescale(students$Income))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Se le puede especificar un máximo y mínimo.
rescale(students$Income, to = c(0, 100))
## [1] 7.407407 0.000000 3.703704 5.555556 53.703704 16.666667
## [7] 90.740741 100.000000 1.851852 35.185185
Una función, que dado un dataset y un vector de valores, reescale las columnas que se encuentran en dicho vector.
rescale.many <- function(dataframe, cols){
names <- names(dataframe)
for(col in cols){
name <- paste(names[col], "rescaled", sep = ".")
dataframe[name] <- rescale(dataframe[,col])
}
cat(paste("Hemos reescalado ", length(cols), " variable(s)"))
dataframe
}
students <- rescale.many(students, c(1,4,5))
## Hemos reescalado 3 variable(s)
print(students)
## Age State Gender Height Income Income.rescaled Age.rescaled Height.rescaled
## 1 23 NJ F 61 5000 0.07407407 0.2222222 0.4000000
## 2 13 NY M 55 1000 0.00000000 0.0000000 0.0000000
## 3 36 NJ M 66 3000 0.03703704 0.5111111 0.7333333
## 4 31 VA F 64 4000 0.05555556 0.4000000 0.6000000
## 5 58 NY F 70 30000 0.53703704 1.0000000 1.0000000
## 6 29 TX F 63 10000 0.16666667 0.3555556 0.5333333
## 7 39 NJ M 67 50000 0.90740741 0.5777778 0.8000000
## 8 50 VA M 70 55000 1.00000000 0.8222222 1.0000000
## 9 23 TX F 61 2000 0.01851852 0.2222222 0.4000000
## 10 36 VA M 66 20000 0.35185185 0.5111111 0.7333333