link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQ_VNceU6ncsQs-_KFvkQsv2XqYKRCMyRYCDYQFosH5bo6Yt-l1gE8ZRdP44m4Rh8lQB2nOY-Y-p0ZP/pub?gid=0&single=true&output=csv"
hsb=read.csv(link, stringsAsFactors = F)
str(hsb)
## 'data.frame': 600 obs. of 15 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ SEX : int 2 1 2 2 2 1 1 2 1 2 ...
## $ RACE : int 2 2 2 2 2 2 2 2 2 2 ...
## $ SES : int 1 1 1 2 2 2 1 1 2 1 ...
## $ SCTYP : int 1 1 1 1 1 1 1 1 1 1 ...
## $ HSP : int 3 2 2 3 3 2 1 1 1 1 ...
## $ LOCUS : num 0.29 -0.42 0.71 0.06 0.22 0.46 0.44 0.68 0.06 0.05 ...
## $ CONCPT: num 0.88 0.03 0.03 0.03 -0.28 0.03 -0.47 0.25 0.56 0.15 ...
## $ MOT : num 0.67 0.33 0.67 0 0 0 0.33 1 0.33 1 ...
## $ CAR : int 10 2 9 15 1 11 10 9 9 11 ...
## $ RDG : num 33.6 46.9 41.6 38.9 36.3 49.5 62.7 44.2 46.9 44.2 ...
## $ WRTG : num 43.7 35.9 59.3 41.1 48.9 46.3 64.5 51.5 41.1 49.5 ...
## $ MATH : num 40.2 41.9 41.9 32.7 39.5 46.2 48 36.9 45.3 40.5 ...
## $ SCI : num 39 36.3 44.4 41.7 41.7 41.7 63.4 49.8 47.1 39 ...
## $ CIV : num 40.6 45.6 45.6 40.6 45.6 35.6 55.6 55.6 55.6 50.6 ...
hsb$ID=as.character(hsb$ID)
hsb[,c(2,3,5,6,10)]=lapply(hsb[,c(2,3,5,6,10)],as.factor)
hsb$HSP=as.ordered(hsb$HSP)
str(hsb,strict.width="cut",width=50)
## 'data.frame': 600 obs. of 15 variables:
## $ ID : chr "1" "2" "3" "4" ...
## $ SEX : Factor w/ 2 levels "1","2": 2 1 2 2 2..
## $ RACE : Factor w/ 4 levels "1","2","3","4": 2..
## $ SES : int 1 1 1 2 2 2 1 1 2 1 ...
## $ SCTYP : Factor w/ 2 levels "1","2": 1 1 1 1 1..
## $ HSP : Ord.factor w/ 3 levels "1"<"2"<"3": 3..
## $ LOCUS : num 0.29 -0.42 0.71 0.06 0.22 0.46 0..
## $ CONCPT: num 0.88 0.03 0.03 0.03 -0.28 0.03 -..
## $ MOT : num 0.67 0.33 0.67 0 0 0 0.33 1 0.33..
## $ CAR : Factor w/ 17 levels "1","2","3","4",...
## $ RDG : num 33.6 46.9 41.6 38.9 36.3 49.5 62..
## $ WRTG : num 43.7 35.9 59.3 41.1 48.9 46.3 64..
## $ MATH : num 40.2 41.9 41.9 32.7 39.5 46.2 48..
## $ SCI : num 39 36.3 44.4 41.7 41.7 41.7 63.4..
## $ CIV : num 40.6 45.6 45.6 40.6 45.6 35.6 55..
hsb$SEX=as.numeric(hsb$SEX)
hsb$SCTYP=as.numeric(hsb$SCTYP)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
base=ggplot(data=hsb, aes(x=SCTYP))
base + geom_histogram(bins = 20)
f1=formula(SCTYP ~ SEX)
aggregate(f1, hsb,mean)
## SEX SCTYP
## 1 1 1.150183
## 2 2 1.162080
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.0.3
ggqqplot(data=hsb,x="SCTYP") + facet_grid(. ~ SEX)
Si se aleja de la normalidad.
normalidadTest=function(x) {y =shapiro.test(x);
c(y$statistic, y$p.value)}
resultado= aggregate(f1, hsb,
FUN = normalidadTest)
library(knitr)
shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
| SEX | SW_Statistic | Probabilidad |
|---|---|---|
| 1 | 0.4264061 | 0 |
| 2 | 0.4430975 | 0 |
| La pro | babilidad es me | nor a 0.05. |
t.test(f1,hsb)
##
## Welch Two Sample t-test
##
## data: SCTYP by SEX
## t = -0.39971, df = 584.79, p-value = 0.6895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07035107 0.04655835
## sample estimates:
## mean in group 1 mean in group 2
## 1.150183 1.162080
El p-value es mayor a 0.05 por lo que aceptamos la H0 de la prueba t.
f2=formula(SCTYP ~ SEX)
aggregate(f2, hsb,mean)
## SEX SCTYP
## 1 1 1.150183
## 2 2 1.162080
library(ggpubr)
ggqqplot(data=hsb,x="SCTYP") + facet_grid(. ~ SEX)
No se muestran muy normales.
resultado= aggregate(f2, hsb,
FUN = normalidadTest)
library(knitr)
shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
| SEX | SW_Statistic | Probabilidad |
|---|---|---|
| 1 | 0.4264061 | 0 |
| 2 | 0.4430975 | 0 |
| Es nec | esario ir por e | l camino no paramétrico. |
summary(aov(f2, data=hsb))
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 0.02 0.02106 0.159 0.69
## Residuals 598 79.25 0.13253
H0 no es significativo puesto que hay ausencia de asteriscoss.
library(ggpubr)
ggerrorplot(hsb, x = "SEX",
y = "SCTYP",
desc_stat = "mean_ci"
)
El nivel 1 no se diferencia mucho del nivel 2.
kruskal.test(f2,hsb)
##
## Kruskal-Wallis rank sum test
##
## data: SCTYP by SEX
## Kruskal-Wallis chi-squared = 0.15911, df = 1, p-value = 0.69
No hay asteriscos, pero el p-value es mayor a 0.05.
ggplot(data=hsb, aes(x=SEX, y=SCTYP)) + geom_boxplot(notch = T)
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
El nivel 2 se interseca y es diferente.