Tarea-variables-nominales.utf8

# comentario: enlace está entre comillas

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQYft7Yp8QC6jpsvnGCOfo77MESXOZAFkJUwFB_fRM5sLoPUxmGy4alHAJH5QLWk90C6OrGs_J_0SrV/pub?gid=1361852141&single=true&output=csv"

# comentario: funcion read.csv le entrega datos al objeto 'sere19':
sere19=read.csv(link, stringsAsFactors = F,na.strings = '')

names(sere19)

##  [1] "ID"     "SEX"    "RACE"   "SES"    "SCTYP"  "LOCUS"  "CONCPT" "MOT"   
##  [9] "RDG"    "WRTG"   "MATH"   "SCI"    "CIV"

str(sere19)

## 'data.frame':    600 obs. of  13 variables:
##  $ ID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ SEX   : chr  "HOMBRE" "MUJER" "HOMBRE" "HOMBRE" ...
##  $ RACE  : chr  "ASIATICO" "ASIATICO" "ASIATICO" "ASIATICO" ...
##  $ SES   : chr  "ALTO" "ALTO" "ALTO" "MEDIO" ...
##  $ SCTYP : chr  "PUBLICA" "PUBLICA" "PUBLICA" "PUBLICA" ...
##  $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
##  $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
##  $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
##  $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
##  $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
##  $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
##  $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
##  $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...

table(sere19$SEX)

## 
## HOMBRE  MUJER 
##    327    273

table(sere19$RACE)

## 
## ASIATICO   BLANCO  HISPANO    NEGRO 
##       71       34      437       58

table(sere19$SES)

## 
##  ALTO  BAJO MEDIO 
##   139   162   299

table(sere19$SCTYP)

## 
##       2 PUBLICA 
##      94     506

library(questionr)

## Warning: package 'questionr' was built under R version 4.0.2

library(magrittr)


NomDf=freq(sere19$SEX,
           total = F,
           # sort = 'dec', # elimina el # si deseas!
           exclude = c(NA)) %>% data.frame()

NomDf=data.frame(variable=row.names(NomDf),
                 NomDf,
                 row.names = NULL)

names(NomDf)=c("Modalidad","Conteo","Porcentaje")
# viendo a NomDf
NomDf

##   Modalidad Conteo Porcentaje
## 1    HOMBRE    327       54.5
## 2     MUJER    273       45.5

NomDf=freq(sere19$RACE,
           total = F,
           # sort = 'dec', # elimina el # si deseas!
           exclude = c(NA)) %>% data.frame()

NomDf=data.frame(variable=row.names(NomDf),
                 NomDf,
                 row.names = NULL)

names(NomDf)=c("Modalidad","Conteo","Porcentaje")
# viendo a NomDf
NomDf

##   Modalidad Conteo Porcentaje
## 1  ASIATICO     71       11.8
## 2    BLANCO     34        5.7
## 3   HISPANO    437       72.8
## 4     NEGRO     58        9.7

NomDf=freq(sere19$SES,
           total = F,
           # sort = 'dec', # elimina el # si deseas!
           exclude = c(NA)) %>% data.frame()

NomDf=data.frame(variable=row.names(NomDf),
                 NomDf,
                 row.names = NULL)

names(NomDf)=c("Modalidad","Conteo","Porcentaje")
# viendo a NomDf
NomDf

##   Modalidad Conteo Porcentaje
## 1      ALTO    139       23.2
## 2      BAJO    162       27.0
## 3     MEDIO    299       49.8

NomDf=freq(sere19$SCTYP,
           total = F,
           # sort = 'dec', # elimina el # si deseas!
           exclude = c(NA)) %>% data.frame()

NomDf=data.frame(variable=row.names(NomDf),
                 NomDf,
                 row.names = NULL)

names(NomDf)=c("Modalidad","Conteo","Porcentaje")
# viendo a NomDf
NomDf

##   Modalidad Conteo Porcentaje
## 1         2     94       15.7
## 2   PUBLICA    506       84.3

library(ggplot2) # instalar previamente

## Warning: package 'ggplot2' was built under R version 4.0.2

# base inicial
base = ggplot(data=NomDf, 
              aes(x=Modalidad, # para los ejes
                  y=Conteo)) 

# luego a la base se le pide añadir la 'geometria' deseada:
bar1 = base + geom_bar(stat='identity') 

# aqui resultado:
bar1

base = ggplot(data=NomDf, 
              aes(x=reorder(Modalidad,Conteo),
                  y=Conteo)) 
bar2 = base + geom_bar(stat='identity') 

bar2

text1="SEX"
text2="RACE"
text3="SES"
text4="SCTYP"

bar2= bar2 + labs(title=text1,
                      x =text2, 
                      y = text3,
                      caption = text4) 
bar2

bar2=bar2 + theme_classic()

bar2=bar2+ theme(plot.title = element_text(hjust = 0.5,#justificacion
                                           size=15), 
                  plot.caption = element_text(hjust = 0),
                  axis.text.x =element_text(angle=45,# angulo
                                            hjust = 1)) 
bar2

library(qcc)

## Warning: package 'qcc' was built under R version 4.0.2

## Package 'qcc' version 2.7

## Type 'citation("qcc")' for citing this R package in publications.

library(DescTools)

## Warning: package 'DescTools' was built under R version 4.0.2

Mode(sere19$SEX)

## [1] "HOMBRE"
## attr(,"freq")
## [1] 327

library(DescTools)
Mode(sere19$RACE)

## [1] "HISPANO"
## attr(,"freq")
## [1] 437

library(DescTools)
Mode(sere19$SES)

## [1] "MEDIO"
## attr(,"freq")
## [1] 299

library(DescTools)
Mode(sere19$SCTYP)

## [1] "PUBLICA"
## attr(,"freq")
## [1] 506

dataTable=table(sere19$MODALIDAD)

# * < 0.01 : indica que la moda no es significativa, las categorias tienen pesos similares.
# * < 0.15 : indica que la moda no es significativa, varias categorias tienen pesos similares.
# * entre 0.15 - 0.3: hay una moda.
# * > 0.3: La moda se diferencia de los demas

Herfindahl(dataTable)

## [1] 0

1/sum(prop.table(dataTable)**2)

## [1] Inf