tareAAAAA.utf8.md

library(rio)
linkGIT="https://github.com/JoseManuelMagallanes/Estadistica_Para_AnalisisPolitico/raw/master/hsb.sav"
A=import(linkGIT)

str(A)

## 'data.frame':    600 obs. of  15 variables:
##  $ ID    : num  1 2 3 4 5 6 7 8 9 10 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ SEX   : num  2 1 2 2 2 1 1 2 1 2 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ RACE  : num  2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ SES   : num  1 1 1 2 2 2 1 1 2 1 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ SCTYP : num  1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ HSP   : num  3 2 2 3 3 2 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CAR   : num  10 2 9 15 1 11 10 9 9 11 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...
##   ..- attr(*, "format.spss")= chr "F5.2"

reviso metadata y formateo:

A$ID=as.character(A$ID)

A[,c(2,3,5,6,10)]=lapply(A[,c(2,3,5,6,10)],as.factor)

A$SES=as.ordered(A$SES)

str(A)

## 'data.frame':    600 obs. of  15 variables:
##  $ ID    : chr  "1" "2" "3" "4" ...
##  $ SEX   : Factor w/ 2 levels "1","2": 2 1 2 2 2 1 1 2 1 2 ...
##  $ RACE  : Factor w/ 4 levels "1","2","3","4": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SES   : Ord.factor w/ 3 levels "1"<"2"<"3": 1 1 1 2 2 2 1 1 2 1 ...
##  $ SCTYP : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HSP   : Factor w/ 3 levels "1","2","3": 3 2 2 3 3 2 1 1 1 1 ...
##  $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CAR   : Factor w/ 17 levels "1","2","3","4",..: 10 2 9 15 1 11 10 9 9 11 ...
##  $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
##   ..- attr(*, "format.spss")= chr "F5.2"
##  $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...
##   ..- attr(*, "format.spss")= chr "F5.2"

PREGUNTAS: Analizar WRITING, por sexo y nivel socioeconómico. Escribir conclusiones

WRTG,SEX NUMERICO ,CATEGÓRICO

f1=formula(WRTG~SEX)

aggregate(f1,A,mean)

##   SEX     WRTG
## 1   1 49.78608
## 2   2 54.55443

hipótesis DEL CASO:Hay mayor cantidad de mujeres que tienen mejor puntuación en el writing que los hombres. Hipótesis general: Entonces hay diferencia entre medias. comprobación:

gráficos

library(ggplot2)
ggplot(A,aes(x=WRTG)) + geom_histogram(aes(y = ..density..),bins = 20, fill='green') +
        stat_function(fun = dnorm, colour = "red",
                      args = list(mean = mean(A$WRTG, na.rm = TRUE),
                                 sd = sd(A$WRTG, na.rm = TRUE))) + facet_grid(~SEX) + coord_flip()

se comprueba que no existe una normalidad ya que no hay curva normal ,por tanto, es no parametrico …donde hay diferencias de media

library(ggpubr)

## Loading required package: magrittr

ggqqplot(data=A,x="WRTG") + facet_grid(. ~ SEX)

SE DESVIAN De la diagonal ,por tanto no hay normalidad y se comprueba de nuevo en N.C

2PROBABILIDAD Shapiro wilk

normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}

resultado= aggregate(f1, A,
                     FUN = normalidadTest) 


library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))

SEX	SW_Statistic	Probabilidad
1	0.9643550	2.8e-06
2	0.9445193	0.0e+00

ES MENOR A 0.05 ENTONCES HAY UNA PROBABILIDAD MUY BAJA QUE SEA CURVA NORMAL.En consecuencia es SIGNIFICATIVO ,es decir,hay mucha diferencia entre las nota de writing entre mujeres y hombres.

4.opción NO parametrico para dicotómicas

wilcox.test(f1,A)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  WRTG by SEX
## W = 32624, p-value = 1.255e-08
## alternative hypothesis: true location shift is not equal to 0

conclusión:

WRTG,SES NUMERICO,CATEGÓRICO

f2=formula(WRTG~SES)
aggregate(f2,A,mean)

##   SES     WRTG
## 1   1 48.70288
## 2   2 52.35853
## 3   3 55.59259

hipótesis del caso:se observa que hay más estatus socioeconómico alto que tienen mejor promedio en el writing. hipótesis general:Hay diferencias entre las medias.

comprobación: grafico

library(ggplot2)
ggplot(A,aes(x=WRTG)) + geom_histogram(aes(y = ..density..),bins = 20, fill='green') +
        stat_function(fun = dnorm, colour = "red",
                      args = list(mean = mean(A$WRTG, na.rm = TRUE),
                                 sd = sd(A$WRTG, na.rm = TRUE))) + facet_grid(~SES) + coord_flip()

SE OBSERVA no existe una normalidad ya que no hay curva normal ,por tanto, es no parametrico …donde hay diferencias de media.

OTRO GRÁFICO:

library(ggpubr)

ggqqplot(data=A,x="WRTG") + facet_grid(. ~ SES)

los puntos se alejan mucho de la diagonal, se alejan de la normalidad.

2.probabilidad:Shapiro Wilk

normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}

resultado= aggregate(f2, A,
                     FUN = normalidadTest) 


library(knitr)

shapiroTest=as.data.frame(resultado[,2])

names(shapiroTest)=c("SW_Statistic","Probabilidad")

kable(cbind(resultado[1],shapiroTest))

SES	SW_Statistic	Probabilidad
1	0.9673549	0.0020755
2	0.9517413	0.0000000
3	0.9249929	0.0000002

ES MENOR QUE 0.05 , en consecuencia ,porbabilidad muy baja que sea curva normal.En otras palabras, es SIGNIFICATIVO ya que hay tanta diferencia entre los tres status socioneconmicos:alto,medio y baja .

4.opción NO paramétricas para politomicas

kruskal.test(f2,A)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  WRTG by SES
## Kruskal-Wallis chi-squared = 37.037, df = 2, p-value = 9.069e-09

ggplot(data=A, aes(x=SES, y=WRTG)) + geom_boxplot(notch = T)

los notches NO se intersectan,entoces no hay igualdad de mediANAS? conclusión: