link="https://docs.google.com/spreadsheets/d/e/2PACX-1vTFp8tPWkUD3qMcuXsqySAHJUZBoIjiFb_pyIJfTiQAK070YNo8G__7wOD_nl_UPYdWnMbW7I5VbRxr/pub?gid=477181888&single=true&output=csv"
escolares=read.csv(link, stringsAsFactors = F,na.strings = '')
library(questionr)
library(magrittr)
NomDf=freq(escolares$RACE,total = F,sort = 'dec',exclude = c(NA)) %>% data.frame()
NomDf=data.frame(variable=row.names(NomDf),NomDf,row.names = NULL)
library(ggplot2)
base = ggplot(data=NomDf,aes(x=variable,y=n)) 
bar1 = base + geom_bar(stat='identity') 
bar1

text1="¿Qué acción es mas común?"
text2="Acción"
text3="Conteo"
text4="Fuente: Municipalidad de San Isidro"

bar2= bar1 + labs(title=text1,
                      x =text2, 
                      y = text3,
                      caption = text4) 
library(qcc)
library(DescTools)
Mode(escolares$RACE)
[1] "HISPANO"
dataTable=table(escolares$RACE)
base=ggplot(data = OrdDf, aes(x=variable, y=n))
Error in ggplot(data = OrdDf, aes(x = variable, y = n)) : 
  object 'OrdDf' not found
bar1 = bar1 + scale_x_discrete(limits = NomDf$variable)
bar1

pareto.chart(table(escolares$RACE),cumperc = c(0,50,80,100))
          
Pareto chart analysis for table(escolares$RACE)
            Frequency  Cum.Freq. Percentage Cum.Percent.
  HISPANO  437.000000 437.000000  72.833333    72.833333
  ASIATICO  71.000000 508.000000  11.833333    84.666667
  NEGRO     58.000000 566.000000   9.666667    94.333333
  BLANCO    34.000000 600.000000   5.666667   100.000000

Herfindahl(dataTable)
[1] 0.5570278
1/sum(prop.table(dataTable)**2)
[1] 1.795243

CON LA ORDINAL

table(escolares$SES)

 ALTO  BAJO MEDIO 
  139   162   299 
base=ggplot(data = OrdDf, aes(x=variable, y=n))
Error in ggplot(data = OrdDf, aes(x = variable, y = n)) : 
  object 'OrdDf' not found
(escolares$SES)

Nombres de variables

names(escolares)
 [1] "ID"     "SEX"    "RACE"   "SES"    "SCTYP"  "LOCUS"  "CONCPT" "MOT"    "RDG"    "WRTG"  
[11] "MATH"   "SCI"    "CIV"   

Tipo de variable (numero, texto o factor)

str(escolares)
'data.frame':   600 obs. of  13 variables:
 $ ID    : Factor w/ 600 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ SEX   : Factor w/ 2 levels "HOMBRE","MUJER": 1 2 1 1 1 2 2 1 2 1 ...
 $ RACE  : Factor w/ 4 levels "ASIATICO","BLANCO",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ SES   : Ord.factor w/ 3 levels "1_bajo"<"2_medio"<..: 3 3 3 2 2 2 3 3 2 3 ...
 $ SCTYP : Factor w/ 2 levels "PRIVADA","PUBLICA": 2 2 2 2 2 2 2 2 2 2 ...
 $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
 $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
 $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
 $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
 $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
 $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
 $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
 $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...

Explorar la categorica, sobre todo como variable dependiente, significa querer saber:

Las modalidades de la variable se distribuyen uniformemente? Hay valores que resaltan? Los que resaltan, acaparan los conteos u ocurrencias? Que problema vemos? que nos podemos preguntar? Y si además la variable fuese ordinal, podriamos querer saber:

Hasta que valor llega el 50% de la poblacion? y el 25% de los valores mas bajo? Desde que valor comienza el 25% de la poblacion?

FUNCION TABLE

table(escolares$RACE)

ASIATICO   BLANCO  HISPANO    NEGRO 
      71       34      437       58 
NomDf

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGluaz0iaHR0cHM6Ly9kb2NzLmdvb2dsZS5jb20vc3ByZWFkc2hlZXRzL2QvZS8yUEFDWC0xdlRGcDh0UFdrVUQzcU1jdVhzcXlTQUhKVVpCb0lqaUZiX3B5SUpmVGlRQUswNzBZTm84R19fN3dPRF9ubF9VUFlkV25NYlc3STVWYlJ4ci9wdWI/Z2lkPTQ3NzE4MTg4OCZzaW5nbGU9dHJ1ZSZvdXRwdXQ9Y3N2Igplc2NvbGFyZXM9cmVhZC5jc3YobGluaywgc3RyaW5nc0FzRmFjdG9ycyA9IEYsbmEuc3RyaW5ncyA9ICcnKQpsaWJyYXJ5KHF1ZXN0aW9ucikKbGlicmFyeShtYWdyaXR0cikKTm9tRGY9ZnJlcShlc2NvbGFyZXMkUkFDRSx0b3RhbCA9IEYsc29ydCA9ICdkZWMnLGV4Y2x1ZGUgPSBjKE5BKSkgJT4lIGRhdGEuZnJhbWUoKQpOb21EZj1kYXRhLmZyYW1lKHZhcmlhYmxlPXJvdy5uYW1lcyhOb21EZiksTm9tRGYscm93Lm5hbWVzID0gTlVMTCkKbGlicmFyeShnZ3Bsb3QyKQpiYXNlID0gZ2dwbG90KGRhdGE9Tm9tRGYsYWVzKHg9dmFyaWFibGUseT1uKSkgCmJhcjEgPSBiYXNlICsgZ2VvbV9iYXIoc3RhdD0naWRlbnRpdHknKSAKYmFyMQp0ZXh0MT0iwr9RdcOpIGFjY2nDs24gZXMgbWFzIGNvbcO6bj8iCnRleHQyPSJBY2Npw7NuIgp0ZXh0Mz0iQ29udGVvIgp0ZXh0ND0iRnVlbnRlOiBNdW5pY2lwYWxpZGFkIGRlIFNhbiBJc2lkcm8iCgpiYXIyPSBiYXIxICsgbGFicyh0aXRsZT10ZXh0MSwKICAgICAgICAgICAgICAgICAgICAgIHggPXRleHQyLCAKICAgICAgICAgICAgICAgICAgICAgIHkgPSB0ZXh0MywKICAgICAgICAgICAgICAgICAgICAgIGNhcHRpb24gPSB0ZXh0NCkgCmxpYnJhcnkocWNjKQpsaWJyYXJ5KERlc2NUb29scykKTW9kZShlc2NvbGFyZXMkUkFDRSkKZGF0YVRhYmxlPXRhYmxlKGVzY29sYXJlcyRSQUNFKQpiYXNlPWdncGxvdChkYXRhID0gT3JkRGYsIGFlcyh4PXZhcmlhYmxlLCB5PW4pKQoKYmFyPSBiYXNlICsgZ2VvbV9iYXIoc3RhdCA9ICdpZGVudGl0eScpCgpiYXIKCmBgYApgYGB7cn0KYmFyMSA9IGJhcjEgKyBzY2FsZV94X2Rpc2NyZXRlKGxpbWl0cyA9IE5vbURmJHZhcmlhYmxlKQpiYXIxCgpgYGAKCmBgYHtyfQpwYXJldG8uY2hhcnQodGFibGUoZXNjb2xhcmVzJFJBQ0UpLGN1bXBlcmMgPSBjKDAsNTAsODAsMTAwKSkKYGBgCgoKCgoKYGBge3J9CkhlcmZpbmRhaGwoZGF0YVRhYmxlKQpgYGAKYGBge3J9CjEvc3VtKHByb3AudGFibGUoZGF0YVRhYmxlKSoqMikKYGBgCgpDT04gTEEgT1JESU5BTApgYGB7cn0KdGFibGUoZXNjb2xhcmVzJFNFUykKCmBgYApgYGB7cn0KKGVzY29sYXJlcyRTRVMpCmBgYAoKCgoKTm9tYnJlcyBkZSB2YXJpYWJsZXMKYGBge3J9Cm5hbWVzKGVzY29sYXJlcykKYGBgClRpcG8gZGUgdmFyaWFibGUgKG51bWVybywgdGV4dG8gbyBmYWN0b3IpCmBgYHtyfQpzdHIoZXNjb2xhcmVzKQpgYGAKRXhwbG9yYXIgbGEgY2F0ZWdvcmljYSwgc29icmUgdG9kbyBjb21vIHZhcmlhYmxlIGRlcGVuZGllbnRlLCBzaWduaWZpY2EgcXVlcmVyIHNhYmVyOgoKTGFzIG1vZGFsaWRhZGVzIGRlIGxhIHZhcmlhYmxlIHNlIGRpc3RyaWJ1eWVuIHVuaWZvcm1lbWVudGU/CkhheSB2YWxvcmVzIHF1ZSByZXNhbHRhbj8gTG9zIHF1ZSByZXNhbHRhbiwgYWNhcGFyYW4gbG9zIGNvbnRlb3MgdSBvY3VycmVuY2lhcz8KUXVlIHByb2JsZW1hIHZlbW9zPyBxdWUgbm9zIHBvZGVtb3MgcHJlZ3VudGFyPwpZIHNpIGFkZW3DoXMgbGEgdmFyaWFibGUgZnVlc2Ugb3JkaW5hbCwgcG9kcmlhbW9zIHF1ZXJlciBzYWJlcjoKCkhhc3RhIHF1ZSB2YWxvciBsbGVnYSBlbCA1MCUgZGUgbGEgcG9ibGFjaW9uPyB5IGVsIDI1JSBkZSBsb3MgdmFsb3JlcyBtYXMgYmFqbz8KRGVzZGUgcXVlIHZhbG9yIGNvbWllbnphIGVsIDI1JSBkZSBsYSBwb2JsYWNpb24/CgoKRlVOQ0lPTiBUQUJMRQpgYGB7cn0KdGFibGUoZXNjb2xhcmVzJFJBQ0UpCmBgYAoKYGBge3J9Ck5vbURmCmBgYAoKCmBgYHtyfQoKYGBgCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIAoKVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiAKCmBgYHtyfQpwbG90KGNhcnMpCmBgYAoKQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCg==