Introducción a RStudio: matriz de datos

Estas son las operaciones realizadas en el video correspondiente al tema

Creación de un data frame ficticio

grupo<-c(rep("experimental",20), rep("control",20))
grupo

##  [1] "experimental" "experimental" "experimental" "experimental" "experimental"
##  [6] "experimental" "experimental" "experimental" "experimental" "experimental"
## [11] "experimental" "experimental" "experimental" "experimental" "experimental"
## [16] "experimental" "experimental" "experimental" "experimental" "experimental"
## [21] "control"      "control"      "control"      "control"      "control"     
## [26] "control"      "control"      "control"      "control"      "control"     
## [31] "control"      "control"      "control"      "control"      "control"     
## [36] "control"      "control"      "control"      "control"      "control"

puntaje<-c(rnorm(20,5,5), rnorm(20, 35, 4))
puntaje

##  [1]  7.0448065 -1.7635494 -0.9661607  3.3180538 -1.3788068  3.8476207
##  [7] 11.6251510 10.8810384  5.0245474  9.8877433  4.4876607  3.9862112
## [13]  2.2145108  7.5186145  4.5699625  6.9373003  4.5859927  8.8651768
## [19]  3.6252316  4.1437260 34.8415443 32.9772615 37.7736161 36.4083677
## [25] 31.3998769 28.3572260 35.3174458 31.5359052 37.5778389 39.8846166
## [31] 36.7964508 30.3064774 37.7954841 35.6373317 33.3200137 37.6133593
## [37] 41.2649011 30.6844942 32.0374443 33.1825020

puntaje<-round(puntaje,2) # en el video esto está hecho más tarde

puntajes.por.grupo<-cbind(grupo, puntaje)
class(puntajes.por.grupo)

## [1] "matrix" "array"

puntajes.por.grupo<-as.data.frame(puntajes.por.grupo)

attributes(puntajes.por.grupo)

## $names
## [1] "grupo"   "puntaje"
## 
## $class
## [1] "data.frame"
## 
## $row.names
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

class(puntajes.por.grupo$grupo)

## [1] "character"

# en este caso la leyó como carácter
#la forzamos a ser un factor
puntajes.por.grupo$grupo<-as.factor(puntajes.por.grupo$grupo)
levels(puntajes.por.grupo$grupo)

## [1] "control"      "experimental"

write.table(puntajes.por.grupo, "basenueva.csv")
write.table(puntajes.por.grupo, "basenueva.csv", sep = ";", row.names = F)

Importación de la base individual de la Encuesta Permanente de Hogares cuarto trimestre de 2022

# en el video se importa la 3/2018, aquí la 4/2022
# el doble .txt es el nombre con el que baja del sitio del INDEC
eph.4.22<-read.table("usu_individual_T422.txt.txt")

eph.4.22<-read.table("usu_individual_T422.txt.txt", sep=";")

eph.4.22<-read.table("usu_individual_T422.txt.txt", sep=";", header = T)

class(eph.4.22)

## [1] "data.frame"

names(eph.4.22)

##   [1] "CODUSU"     "ANO4"       "TRIMESTRE"  "NRO_HOGAR"  "COMPONENTE"
##   [6] "H15"        "REGION"     "MAS_500"    "AGLOMERADO" "PONDERA"   
##  [11] "CH03"       "CH04"       "CH05"       "CH06"       "CH07"      
##  [16] "CH08"       "CH09"       "CH10"       "CH11"       "CH12"      
##  [21] "CH13"       "CH14"       "CH15"       "CH15_COD"   "CH16"      
##  [26] "CH16_COD"   "NIVEL_ED"   "ESTADO"     "CAT_OCUP"   "CAT_INAC"  
##  [31] "IMPUTA"     "PP02C1"     "PP02C2"     "PP02C3"     "PP02C4"    
##  [36] "PP02C5"     "PP02C6"     "PP02C7"     "PP02C8"     "PP02E"     
##  [41] "PP02H"      "PP02I"      "PP03C"      "PP03D"      "PP3E_TOT"  
##  [46] "PP3F_TOT"   "PP03G"      "PP03H"      "PP03I"      "PP03J"     
##  [51] "INTENSI"    "PP04A"      "PP04B_COD"  "PP04B1"     "PP04B2"    
##  [56] "PP04B3_MES" "PP04B3_ANO" "PP04B3_DIA" "PP04C"      "PP04C99"   
##  [61] "PP04D_COD"  "PP04G"      "PP05B2_MES" "PP05B2_ANO" "PP05B2_DIA"
##  [66] "PP05C_1"    "PP05C_2"    "PP05C_3"    "PP05E"      "PP05F"     
##  [71] "PP05H"      "PP06A"      "PP06C"      "PP06D"      "PP06E"     
##  [76] "PP06H"      "PP07A"      "PP07C"      "PP07D"      "PP07E"     
##  [81] "PP07F1"     "PP07F2"     "PP07F3"     "PP07F4"     "PP07F5"    
##  [86] "PP07G1"     "PP07G2"     "PP07G3"     "PP07G4"     "PP07G_59"  
##  [91] "PP07H"      "PP07I"      "PP07J"      "PP07K"      "PP08D1"    
##  [96] "PP08D4"     "PP08F1"     "PP08F2"     "PP08J1"     "PP08J2"    
## [101] "PP08J3"     "PP09A"      "PP09A_ESP"  "PP09B"      "PP09C"     
## [106] "PP09C_ESP"  "PP10A"      "PP10C"      "PP10D"      "PP10E"     
## [111] "PP11A"      "PP11B_COD"  "PP11B1"     "PP11B2_MES" "PP11B2_ANO"
## [116] "PP11B2_DIA" "PP11C"      "PP11C99"    "PP11D_COD"  "PP11G_ANO" 
## [121] "PP11G_MES"  "PP11G_DIA"  "PP11L"      "PP11L1"     "PP11M"     
## [126] "PP11N"      "PP11O"      "PP11P"      "PP11Q"      "PP11R"     
## [131] "PP11S"      "PP11T"      "P21"        "DECOCUR"    "IDECOCUR"  
## [136] "RDECOCUR"   "GDECOCUR"   "PDECOCUR"   "ADECOCUR"   "PONDIIO"   
## [141] "TOT_P12"    "P47T"       "DECINDR"    "IDECINDR"   "RDECINDR"  
## [146] "GDECINDR"   "PDECINDR"   "ADECINDR"   "PONDII"     "V2_M"      
## [151] "V3_M"       "V4_M"       "V5_M"       "V8_M"       "V9_M"      
## [156] "V10_M"      "V11_M"      "V12_M"      "V18_M"      "V19_AM"    
## [161] "V21_M"      "T_VI"       "ITF"        "DECIFR"     "IDECIFR"   
## [166] "RDECIFR"    "GDECIFR"    "PDECIFR"    "ADECIFR"    "IPCF"      
## [171] "DECCFR"     "IDECCFR"    "RDECCFR"    "GDECCFR"    "PDECCFR"   
## [176] "ADECCFR"    "PONDIH"

Designación como factores y asignación de categorías textuales

eph.4.22$sexo<-as.factor(eph.4.22$CH04)
levels(eph.4.22$sexo)

## [1] "1" "2"

levels(eph.4.22$sexo)<-c("varon", "mujer")
eph.4.22$ESTADO<-as.factor(eph.4.22$ESTADO)
levels(eph.4.22$ESTADO)

## [1] "0" "1" "2" "3" "4"

levels(eph.4.22$ESTADO)<-c(NA, "ocupade", "desocupade", "inactive", NA)
levels(eph.4.22$ESTADO)

## [1] "ocupade"    "desocupade" "inactive"

Introducción a RStudio: matriz de datos

Eduardo León Bologna

20 de junio de 2023

Creación de un data frame ficticio

Importación de la base individual de la Encuesta Permanente de Hogares cuarto trimestre de 2022

Designación como factores y asignación de categorías textuales