R Markdown

#######--------------------------------                    
summary(Datos)
##     GTO_TIC      ENTORNO  SEXO   
##  Min.   :207.3   GU:100   H:150  
##  1st Qu.:378.4   R :100   M:150  
##  Median :444.9   U :100          
##  Mean   :432.2                   
##  3rd Qu.:492.6                   
##  Max.   :608.2
##----- Analisis numerico y grafico de la variable GTO_TIC----------------------------
summary(Datos$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   207.3   378.4   444.9   432.2   492.6   608.2
boxplot(Datos$GTO_TIC, ylab="euros/año",
        main = "Gasto anual en productos TIC",
        col = "gold", 
        las = 1)

hist(Datos$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en productos TIC",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)

##------------------------Creamos los subgrupos-----
gu_gto<-subset(Datos,
               ENTORNO =="GU")
u_gto<-subset(Datos,
              ENTORNO =="U")
r_gto<-subset(Datos,
              ENTORNO =="R")
M_gto<-subset(Datos,
              SEXO =="M")
H_gto<-subset(Datos,
              SEXO =="H")
##-------------Analisis numerico y grafico de la variable GTO_TIC por localidad 
summary(gu_gto$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   424.2   467.2   494.7   497.4   527.0   608.2
summary(r_gto$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   207.3   304.1   341.9   344.4   379.4   487.9
summary(u_gto$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   339.9   422.0   453.4   454.7   482.8   568.1
par(mfrow = c(1, 1))
boxplot(GTO_TIC~ENTORNO,data = Datos, ylab="euros/año",
        main = "Gasto anual en TIC segun el entorno",
        col = "gold",
        las = 1)

par(mfrow = c(1, 3))
hist(gu_gto$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en TIC en gran urbe",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)
hist(r_gto$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en TIC en zona rural",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)
hist(H_gto$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en TIC en urbe",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)

##-------------Analisis numerico y grafico de la variable GTO_TIC por sexo
summary(M_gto$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   207.3   369.6   450.4   434.3   498.9   608.2
summary(H_gto$GTO_TIC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   248.0   381.1   442.9   430.0   485.0   552.8
par(mfrow = c(1, 1))
boxplot(GTO_TIC~SEXO,data=Datos,
        ylab="euros/año",
        main = "Gasto anual en TIC por sexo",
        ylim=c(200,600),
        col = "gold",
        las = 1)

library(pyramid)
datos_piramide_h<-H_gto[,-2:-3]
datos_piramide_m<-M_gto[,-2:-3]
datos_grafico_piramide<-data.frame(datos_piramide_h,datos_piramide_m)
par(mfrow = c(1, 1))
pyramid(datos_grafico_piramide,
        Llab="Hombres",
        Rlab="Mujeres",
        Cstep = 100000,
        Clab = "", 
 Lcol="green", 
 Rcol="cyan", 
 Cgap=0.001, 
 GL= FALSE, 
 main = "Gasto anual en TIC por sexo")

par(mfrow = c(1, 2))
hist(M_gto$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en TIC de hombres",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)
hist(H_gto$GTO_TIC,
     xlab="euros/año",
     ylab="Frecuencia",
     main = "Gasto anual en TIC de mujeres",
     col = "gold",
     border="tomato1",
     labels=TRUE,
     las = 1)

##########----------------------- validacion hipotesis graficamente-------------------

Datos_anova_entorno <- aov(GTO_TIC ~ ENTORNO, 
                 data = Datos)
summary(Datos_anova_entorno)
##              Df  Sum Sq Mean Sq F value Pr(>F)    
## ENTORNO       2 1246328  623164   300.2 <2e-16 ***
## Residuals   297  616423    2075                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2, 2))
plot(Datos_anova_entorno)

######--------validacion hipotesis numericamente--------------

bartlett.test(GTO_TIC ~ ENTORNO,
              data = Datos)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  GTO_TIC by ENTORNO
## Bartlett's K-squared = 9.8049, df = 2, p-value = 0.007428
shapiro.test(residuals(Datos_anova_entorno))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(Datos_anova_entorno)
## W = 0.99505, p-value = 0.451
#####------------------------------
TukeyHSD(Datos_anova_entorno, conf.level = 0.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GTO_TIC ~ ENTORNO, data = Datos)
## 
## $ENTORNO
##           diff        lwr        upr p adj
## R-GU -152.9645 -168.14073 -137.78827     0
## U-GU  -42.6250  -57.80123  -27.44877     0
## U-R   110.3395   95.16327  125.51573     0
kruskal.test(GTO_TIC ~ ENTORNO,
             data = Datos)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  GTO_TIC by ENTORNO
## Kruskal-Wallis chi-squared = 197.54, df = 2, p-value < 2.2e-16
########--------------------------------------
library("pgirmess")

kruskalmc(GTO_TIC ~ ENTORNO, 
          data = Datos)
## Multiple comparison test after Kruskal-Wallis 
## p.value: 0.05 
## Comparisons
##      obs.dif critical.dif difference
## GU-R  169.27     29.36897       TRUE
## GU-U   56.21     29.36897       TRUE
## R-U   113.06     29.36897       TRUE
##########-------------------
Datos_anova_sexo <- aov(GTO_TIC ~ SEXO, 
                           data = Datos)
summary(Datos_anova_sexo)
##              Df  Sum Sq Mean Sq F value Pr(>F)
## SEXO          1    1367    1367   0.219   0.64
## Residuals   298 1861384    6246
par(mfrow = c(2, 2))
plot(Datos_anova_sexo)

bartlett.test(GTO_TIC ~ SEXO,
              data = Datos)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  GTO_TIC by SEXO
## Bartlett's K-squared = 3.7675, df = 1, p-value = 0.05226
shapiro.test(residuals(Datos_anova_sexo))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(Datos_anova_sexo)
## W = 0.9719, p-value = 1.333e-05
#########--------------------

kruskal.test(GTO_TIC ~SEXO, data = Datos)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  GTO_TIC by SEXO
## Kruskal-Wallis chi-squared = 0.48096, df = 1, p-value = 0.488
#######----------------------


#######----------------ANOVA 2 FACTORES

Datos_anova2 <- aov(GTO_TIC ~ ENTORNO*SEXO, data = Datos) 
summary(Datos_anova2)
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## ENTORNO        2 1246328  623164 303.922 <2e-16 ***
## SEXO           1    1367    1367   0.667 0.4149    
## ENTORNO:SEXO   2   12235    6118   2.984 0.0521 .  
## Residuals    294  602821    2050                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(residuals(Datos_anova2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(Datos_anova2)
## W = 0.99467, p-value = 0.383
par(mfrow = c(1, 1))

interaction.plot(Datos$ENTORNO,Datos$SEXO,Datos$GTO_TIC,
                 type="b",pch=c(25,19), leg.bty="0", leg.bg="beige",
                 las=1,lwd=2.5,col="red3",main="Interaccion Entorno-Sexo",
                 ylab = "GTO TIC(Euros/año)",trace.label = "Sexo",
                 xlab = "Entorno")