R Markdown
#######--------------------------------
summary(Datos)
## GTO_TIC ENTORNO SEXO
## Min. :207.3 GU:100 H:150
## 1st Qu.:378.4 R :100 M:150
## Median :444.9 U :100
## Mean :432.2
## 3rd Qu.:492.6
## Max. :608.2
##----- Analisis numerico y grafico de la variable GTO_TIC----------------------------
summary(Datos$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 207.3 378.4 444.9 432.2 492.6 608.2
boxplot(Datos$GTO_TIC, ylab="euros/año",
main = "Gasto anual en productos TIC",
col = "gold",
las = 1)

hist(Datos$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en productos TIC",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)

##------------------------Creamos los subgrupos-----
gu_gto<-subset(Datos,
ENTORNO =="GU")
u_gto<-subset(Datos,
ENTORNO =="U")
r_gto<-subset(Datos,
ENTORNO =="R")
M_gto<-subset(Datos,
SEXO =="M")
H_gto<-subset(Datos,
SEXO =="H")
##-------------Analisis numerico y grafico de la variable GTO_TIC por localidad
summary(gu_gto$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 424.2 467.2 494.7 497.4 527.0 608.2
summary(r_gto$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 207.3 304.1 341.9 344.4 379.4 487.9
summary(u_gto$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 339.9 422.0 453.4 454.7 482.8 568.1
par(mfrow = c(1, 1))
boxplot(GTO_TIC~ENTORNO,data = Datos, ylab="euros/año",
main = "Gasto anual en TIC segun el entorno",
col = "gold",
las = 1)

par(mfrow = c(1, 3))
hist(gu_gto$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en TIC en gran urbe",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)
hist(r_gto$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en TIC en zona rural",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)
hist(H_gto$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en TIC en urbe",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)

##-------------Analisis numerico y grafico de la variable GTO_TIC por sexo
summary(M_gto$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 207.3 369.6 450.4 434.3 498.9 608.2
summary(H_gto$GTO_TIC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 248.0 381.1 442.9 430.0 485.0 552.8
par(mfrow = c(1, 1))
boxplot(GTO_TIC~SEXO,data=Datos,
ylab="euros/año",
main = "Gasto anual en TIC por sexo",
ylim=c(200,600),
col = "gold",
las = 1)

library(pyramid)
datos_piramide_h<-H_gto[,-2:-3]
datos_piramide_m<-M_gto[,-2:-3]
datos_grafico_piramide<-data.frame(datos_piramide_h,datos_piramide_m)
par(mfrow = c(1, 1))
pyramid(datos_grafico_piramide,
Llab="Hombres",
Rlab="Mujeres",
Cstep = 100000,
Clab = "",
Lcol="green",
Rcol="cyan",
Cgap=0.001,
GL= FALSE,
main = "Gasto anual en TIC por sexo")

par(mfrow = c(1, 2))
hist(M_gto$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en TIC de hombres",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)
hist(H_gto$GTO_TIC,
xlab="euros/año",
ylab="Frecuencia",
main = "Gasto anual en TIC de mujeres",
col = "gold",
border="tomato1",
labels=TRUE,
las = 1)

##########----------------------- validacion hipotesis graficamente-------------------
Datos_anova_entorno <- aov(GTO_TIC ~ ENTORNO,
data = Datos)
summary(Datos_anova_entorno)
## Df Sum Sq Mean Sq F value Pr(>F)
## ENTORNO 2 1246328 623164 300.2 <2e-16 ***
## Residuals 297 616423 2075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2, 2))
plot(Datos_anova_entorno)
######--------validacion hipotesis numericamente--------------
bartlett.test(GTO_TIC ~ ENTORNO,
data = Datos)
##
## Bartlett test of homogeneity of variances
##
## data: GTO_TIC by ENTORNO
## Bartlett's K-squared = 9.8049, df = 2, p-value = 0.007428
shapiro.test(residuals(Datos_anova_entorno))
##
## Shapiro-Wilk normality test
##
## data: residuals(Datos_anova_entorno)
## W = 0.99505, p-value = 0.451
#####------------------------------
TukeyHSD(Datos_anova_entorno, conf.level = 0.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = GTO_TIC ~ ENTORNO, data = Datos)
##
## $ENTORNO
## diff lwr upr p adj
## R-GU -152.9645 -168.14073 -137.78827 0
## U-GU -42.6250 -57.80123 -27.44877 0
## U-R 110.3395 95.16327 125.51573 0
kruskal.test(GTO_TIC ~ ENTORNO,
data = Datos)
##
## Kruskal-Wallis rank sum test
##
## data: GTO_TIC by ENTORNO
## Kruskal-Wallis chi-squared = 197.54, df = 2, p-value < 2.2e-16
########--------------------------------------
library("pgirmess")

kruskalmc(GTO_TIC ~ ENTORNO,
data = Datos)
## Multiple comparison test after Kruskal-Wallis
## p.value: 0.05
## Comparisons
## obs.dif critical.dif difference
## GU-R 169.27 29.36897 TRUE
## GU-U 56.21 29.36897 TRUE
## R-U 113.06 29.36897 TRUE
##########-------------------
Datos_anova_sexo <- aov(GTO_TIC ~ SEXO,
data = Datos)
summary(Datos_anova_sexo)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEXO 1 1367 1367 0.219 0.64
## Residuals 298 1861384 6246
par(mfrow = c(2, 2))
plot(Datos_anova_sexo)

bartlett.test(GTO_TIC ~ SEXO,
data = Datos)
##
## Bartlett test of homogeneity of variances
##
## data: GTO_TIC by SEXO
## Bartlett's K-squared = 3.7675, df = 1, p-value = 0.05226
shapiro.test(residuals(Datos_anova_sexo))
##
## Shapiro-Wilk normality test
##
## data: residuals(Datos_anova_sexo)
## W = 0.9719, p-value = 1.333e-05
#########--------------------
kruskal.test(GTO_TIC ~SEXO, data = Datos)
##
## Kruskal-Wallis rank sum test
##
## data: GTO_TIC by SEXO
## Kruskal-Wallis chi-squared = 0.48096, df = 1, p-value = 0.488
#######----------------------
#######----------------ANOVA 2 FACTORES
Datos_anova2 <- aov(GTO_TIC ~ ENTORNO*SEXO, data = Datos)
summary(Datos_anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## ENTORNO 2 1246328 623164 303.922 <2e-16 ***
## SEXO 1 1367 1367 0.667 0.4149
## ENTORNO:SEXO 2 12235 6118 2.984 0.0521 .
## Residuals 294 602821 2050
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(residuals(Datos_anova2))
##
## Shapiro-Wilk normality test
##
## data: residuals(Datos_anova2)
## W = 0.99467, p-value = 0.383
par(mfrow = c(1, 1))
interaction.plot(Datos$ENTORNO,Datos$SEXO,Datos$GTO_TIC,
type="b",pch=c(25,19), leg.bty="0", leg.bg="beige",
las=1,lwd=2.5,col="red3",main="Interaccion Entorno-Sexo",
ylab = "GTO TIC(Euros/año)",trace.label = "Sexo",
xlab = "Entorno")
