Cargando datos y librerías
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.1.3
crime.data <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearningR/data/t1/USArrests.csv",
stringsAsFactors = FALSE)
str(crime.data)
## 'data.frame': 50 obs. of 5 variables:
## $ X : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ Murder : num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
## $ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
## $ UrbanPop: int 58 48 80 50 91 78 77 72 80 60 ...
## $ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
head(crime.data)
## X Murder Assault UrbanPop Rape
## 1 Alabama 13.2 236 58 21.2
## 2 Alaska 10.0 263 48 44.5
## 3 Arizona 8.1 294 80 31.0
## 4 Arkansas 8.8 190 50 19.5
## 5 California 9.0 276 91 40.6
## 6 Colorado 7.9 204 78 38.7
Función cbind
crime.data <- cbind(state = rownames(crime.data), crime.data)
head(crime.data)
## state X Murder Assault UrbanPop Rape
## 1 1 Alabama 13.2 236 58 21.2
## 2 2 Alaska 10.0 263 48 44.5
## 3 3 Arizona 8.1 294 80 31.0
## 4 4 Arkansas 8.8 190 50 19.5
## 5 5 California 9.0 276 91 40.6
## 6 6 Colorado 7.9 204 78 38.7
crime.data.1 <- gather(crime.data,
key = "crime_type",
value = "arrest_estimate",
Murder : UrbanPop)
head(crime.data.1)
## state X Rape crime_type arrest_estimate
## 1 1 Alabama 21.2 Murder 13.2
## 2 2 Alaska 44.5 Murder 10.0
## 3 3 Arizona 31.0 Murder 8.1
## 4 4 Arkansas 19.5 Murder 8.8
## 5 5 California 40.6 Murder 9.0
## 6 6 Colorado 38.7 Murder 7.9
crime.data.2 <- gather(crime.data,
key = "crime_type",
value = "arrest_estimate",
-state)
head(crime.data.2)
## state crime_type arrest_estimate
## 1 1 X Alabama
## 2 2 X Alaska
## 3 3 X Arizona
## 4 4 X Arkansas
## 5 5 X California
## 6 6 X Colorado
crimate.data.3 <- gather(crime.data,
key = "crime_type",
value = "arrest_estimate",
Murder, Assault)
head(crimate.data.3)
## state X UrbanPop Rape crime_type arrest_estimate
## 1 1 Alabama 58 21.2 Murder 13.2
## 2 2 Alaska 48 44.5 Murder 10.0
## 3 3 Arizona 80 31.0 Murder 8.1
## 4 4 Arkansas 50 19.5 Murder 8.8
## 5 5 California 91 40.6 Murder 9.0
## 6 6 Colorado 78 38.7 Murder 7.9
crime.data.4 <- tidyr::spread(crime.data.2,
key = "crime_type",
value = "arrest_estimate")
head(crime.data.4)
## state Assault Murder Rape UrbanPop X
## 1 1 236 13.2 21.2 58 Alabama
## 2 10 211 17.4 25.8 60 Georgia
## 3 11 46 5.3 20.2 83 Hawaii
## 4 12 120 2.6 14.2 54 Idaho
## 5 13 249 10.4 24 83 Illinois
## 6 14 113 7.2 21 65 Indiana
crime.data.5 <- unite(crime.data,
col = "Murder_Assault",
Murder, Assault,
sep = "_")
head(crime.data.5)
## state X Murder_Assault UrbanPop Rape
## 1 1 Alabama 13.2_236 58 21.2
## 2 2 Alaska 10_263 48 44.5
## 3 3 Arizona 8.1_294 80 31.0
## 4 4 Arkansas 8.8_190 50 19.5
## 5 5 California 9_276 91 40.6
## 6 6 Colorado 7.9_204 78 38.7
crime.data.6 <- separate(crime.data.5,
col= "Murder_Assault",
into = c("Murder", "Assault"),
sep = "_")
head(crime.data.6)
## state X Murder Assault UrbanPop Rape
## 1 1 Alabama 13.2 236 58 21.2
## 2 2 Alaska 10 263 48 44.5
## 3 3 Arizona 8.1 294 80 31.0
## 4 4 Arkansas 8.8 190 50 19.5
## 5 5 California 9 276 91 40.6
## 6 6 Colorado 7.9 204 78 38.7