r.utf8

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQ_VNceU6ncsQs-_KFvkQsv2XqYKRCMyRYCDYQFosH5bo6Yt-l1gE8ZRdP44m4Rh8lQB2nOY-Y-p0ZP/pub?gid=0&single=true&output=csv"

hsb=read.csv(link, stringsAsFactors = F)

str(hsb)

## 'data.frame':    600 obs. of  15 variables:
##  $ ID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ SEX   : int  2 1 2 2 2 1 1 2 1 2 ...
##  $ RACE  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ SES   : int  1 1 1 2 2 2 1 1 2 1 ...
##  $ SCTYP : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ HSP   : int  3 2 2 3 3 2 1 1 1 1 ...
##  $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
##  $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
##  $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
##  $ CAR   : int  10 2 9 15 1 11 10 9 9 11 ...
##  $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
##  $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
##  $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
##  $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
##  $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...

hsb$ID=as.character(hsb$ID)
hsb[,c(2,3,5,6,10)]=lapply(hsb[,c(2,3,5,6,10)],as.factor)
hsb$HSP=as.ordered(hsb$HSP)

str(hsb,strict.width="cut",width=50)

## 'data.frame':    600 obs. of  15 variables:
##  $ ID    : chr  "1" "2" "3" "4" ...
##  $ SEX   : Factor w/ 2 levels "1","2": 2 1 2 2 2..
##  $ RACE  : Factor w/ 4 levels "1","2","3","4": 2..
##  $ SES   : int  1 1 1 2 2 2 1 1 2 1 ...
##  $ SCTYP : Factor w/ 2 levels "1","2": 1 1 1 1 1..
##  $ HSP   : Ord.factor w/ 3 levels "1"<"2"<"3": 3..
##  $ LOCUS : num  0.29 -0.42 0.71 0.06 0.22 0.46 0..
##  $ CONCPT: num  0.88 0.03 0.03 0.03 -0.28 0.03 -..
##  $ MOT   : num  0.67 0.33 0.67 0 0 0 0.33 1 0.33..
##  $ CAR   : Factor w/ 17 levels "1","2","3","4",...
##  $ RDG   : num  33.6 46.9 41.6 38.9 36.3 49.5 62..
##  $ WRTG  : num  43.7 35.9 59.3 41.1 48.9 46.3 64..
##  $ MATH  : num  40.2 41.9 41.9 32.7 39.5 46.2 48..
##  $ SCI   : num  39 36.3 44.4 41.7 41.7 41.7 63.4..
##  $ CIV   : num  40.6 45.6 45.6 40.6 45.6 35.6 55..

hsb$SEX=as.numeric(hsb$SEX)

hsb$SCTYP=as.numeric(hsb$SCTYP)

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.2

base=ggplot(data=hsb, aes(x=SCTYP))
base + geom_histogram(bins = 20)

f1=formula(SCTYP ~ SEX)
aggregate(f1, hsb,mean)

##   SEX    SCTYP
## 1   1 1.150183
## 2   2 1.162080

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.0.3

ggqqplot(data=hsb,x="SCTYP") + facet_grid(. ~ SEX)

Si se aleja de la normalidad.

normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}
resultado= aggregate(f1, hsb,
                     FUN = normalidadTest) 
library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))

SEX	SW_Statistic	Probabilidad
1	0.4264061	0
2	0.4430975	0
La pro	babilidad es me	nor a 0.05.

t.test(f1,hsb)

## 
##  Welch Two Sample t-test
## 
## data:  SCTYP by SEX
## t = -0.39971, df = 584.79, p-value = 0.6895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07035107  0.04655835
## sample estimates:
## mean in group 1 mean in group 2 
##        1.150183        1.162080

El p-value es mayor a 0.05 por lo que aceptamos la H0 de la prueba t.

f2=formula(SCTYP ~ SEX)
aggregate(f2, hsb,mean)

##   SEX    SCTYP
## 1   1 1.150183
## 2   2 1.162080

library(ggpubr)
ggqqplot(data=hsb,x="SCTYP") + facet_grid(. ~ SEX)

No se muestran muy normales.

resultado= aggregate(f2, hsb,
                     FUN = normalidadTest) 
library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))

SEX	SW_Statistic	Probabilidad
1	0.4264061	0
2	0.4430975	0
Es nec	esario ir por e	l camino no paramétrico.

summary(aov(f2, data=hsb))

##              Df Sum Sq Mean Sq F value Pr(>F)
## SEX           1   0.02 0.02106   0.159   0.69
## Residuals   598  79.25 0.13253

H0 no es significativo puesto que hay ausencia de asteriscoss.

library(ggpubr)
ggerrorplot(hsb, x = "SEX", 
            y = "SCTYP", 
            desc_stat = "mean_ci"
            )

El nivel 1 no se diferencia mucho del nivel 2.

kruskal.test(f2,hsb)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  SCTYP by SEX
## Kruskal-Wallis chi-squared = 0.15911, df = 1, p-value = 0.69

No hay asteriscos, pero el p-value es mayor a 0.05.

ggplot(data=hsb, aes(x=SEX, y=SCTYP)) + geom_boxplot(notch = T)

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

El nivel 2 se interseca y es diferente.