library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
data("iris")
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
head(iris,10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
using factor function to recode values of variable Species.
attach(iris)
iris$Species <- factor(iris$Species,
levels = c("setosa","versicolor","virginica"),
labels = c(1,2,3))
levels(iris$Species)
## [1] "1" "2" "3"
mean_val<-mean(Sepal.Width)
mean_val
## [1] 3.057333
iris$category[iris$Sepal.Width< mean_val] <- "low"
iris$category[iris$Sepal.Width>=mean_val] <- "high"
# Convert the column to a factor
iris$category <- factor(iris$category)
levels(iris$category)
## [1] "high" "low"
iris$category <- ordered(iris$category,
levels = c("Low", "High"),
labels =c(1,2))
levels(iris$category)
## [1] "1" "2"
using match()
oldvalues <- c(1,2)
newvalues <- factor(c("v1","v2")) #converting to factor
iris$category <- newvalues[ match(iris$category, oldvalues) ]
levels(iris$category)
## [1] "v1" "v2"
The easiest way is to use revalue() or mapvalues() from the plyr package
library(plyr)
iris$Species <- revalue(iris$Species, c("setosa"="1", "versicolor"="2", "virginica"="3"))
## The following `from` values were not present in `x`: setosa, versicolor, virginica
iris$Species <- mapvalues(iris$Species, from = c("setosa","versicolor","virginica"), to = c("1", "2","3"))
## The following `from` values were not present in `x`: setosa, versicolor, virginica
levels(iris$Species)
## [1] "1" "2" "3"