library(summarytools)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.3
library("readxl")
## Warning: package 'readxl' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
rutadearchivo="C:/Users/PLOZANO/Desktop/vinculacion/EncuestasUESFA.xlsx"
Datavinculacion=read_excel(rutadearchivo)
Datavinculacion
## # A tibble: 24 × 39
##    ESTUDIANTES Sexo   Edad Zona     P1    P2    P3    P4    P5    P6    P7    P8
##    <chr>       <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 E1          F        17 Urba…     3     4     4     2     2     3     3     2
##  2 E2          F        17 Rural     4     4     4     3     3     4     2     3
##  3 E3          F        17 Urba…     3     4     5     2     2     3     2     2
##  4 E4          M        16 Urba…     3     5     4     2     4     4     1     3
##  5 E5          F        16 Rural     5     4     4     3     3     3     2     4
##  6 E6          M        17 Rural     3     4     4     3     3     2     2     1
##  7 E7          M        17 Urba…     3     4     5     2     3     3     3     1
##  8 E8          F        16 Urba…     4     4     4     2     3     3     1     1
##  9 E9          M        17 Urba…     4     5     4     3     4     3     2     3
## 10 E10         M        16 Urba…     4     4     3     3     3     3     2     3
## # ℹ 14 more rows
## # ℹ 27 more variables: P9 <dbl>, P10 <dbl>, P11 <dbl>, P12 <dbl>, P13 <dbl>,
## #   P14 <dbl>, P15 <dbl>, P16 <dbl>, P17 <dbl>, P18 <dbl>, P19 <dbl>,
## #   P20 <dbl>, P21 <dbl>, P22 <dbl>, P23 <dbl>, P24 <dbl>, P25 <dbl>,
## #   P26 <dbl>, P27 <dbl>, P28 <dbl>, P29 <dbl>, P30 <dbl>, P31 <dbl>,
## #   P32 <dbl>, P33 <dbl>, P34 <dbl>, P35 <dbl>
rutadearchivo1="C:/Users/PLOZANO/Desktop/vinculacion/Encuestas_opciones.xlsx"

Datavinculacionl=read_excel(rutadearchivo1)
Datavinculacionl
## # A tibble: 24 × 35
##    ESTUDIANTES Sexo   Edad Zona  P1    P2    P3    P4    P5    P6    P7    P8   
##    <chr>       <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1 E1          F        17 Urba… Neut… De a… De a… Muy … Muy … Algo  Algo  Casi…
##  2 E2          F        17 Rural De a… De a… De a… Algo  Algo  Bast… Muy … A ve…
##  3 E3          F        17 Urba… Neut… De a… Tota… Muy … Muy … Algo  Muy … Casi…
##  4 E4          M        16 Urba… Neut… Tota… De a… Muy … Bast… Bast… Nada  A ve…
##  5 E5          F        16 Rural Tota… De a… De a… Algo  Algo  Algo  Muy … Frec…
##  6 E6          M        17 Rural Neut… De a… De a… Algo  Algo  Muy … Muy … Nunca
##  7 E7          M        17 Urba… Neut… De a… Tota… Muy … Algo  Algo  Algo  Nunca
##  8 E8          F        16 Urba… De a… De a… De a… Muy … Algo  Algo  Nada  Nunca
##  9 E9          M        17 Urba… De a… Tota… De a… Algo  Bast… Algo  Muy … A ve…
## 10 E10         M        16 Urba… De a… De a… Neut… Algo  Algo  Algo  Muy … A ve…
## # ℹ 14 more rows
## # ℹ 23 more variables: P9 <chr>, P10 <chr>, P11 <chr>, P12 <chr>, P13 <chr>,
## #   P14 <chr>, P15 <chr>, P16 <chr>, P17 <chr>, P18 <chr>, P19 <chr>,
## #   P20 <chr>, P21 <chr>, P22 <chr>, P23 <chr>, P24 <chr>, P25 <chr>,
## #   P26 <chr>, P27 <chr>, P28 <chr>, P29 <chr>, P30 <chr>, P31 <chr>
library(psych)
## Warning: package 'psych' was built under R version 4.2.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
datos <- Datavinculacion[, paste0("P", 5:35)]

# Aplica el Alfa de Cronbach
resultado <- alpha(datos)
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in alpha(datos): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( P7 P21 P33 P34 P35 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
print(resultado$total$raw_alpha)  # Solo el valor
## [1] 0.7322394
cor(Datavinculacion$P2, Datavinculacion$P5)
## [1] 0.4763305
cor(Datavinculacion$P1, Datavinculacion$P6)
## [1] 0.1906807
#install.packages("corrplot")
library(corrplot)
## corrplot 0.95 loaded
datos_preguntas <- Datavinculacion[, paste0("P", 1:35)]

datos_preguntas
## # A tibble: 24 × 35
##       P1    P2    P3    P4    P5    P6    P7    P8    P9   P10   P11   P12   P13
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     3     4     4     2     2     3     3     2     2     3     3     3     4
##  2     4     4     4     3     3     4     2     3     3     3     4     4     4
##  3     3     4     5     2     2     3     2     2     3     3     3     4     4
##  4     3     5     4     2     4     4     1     3     3     3     4     5     4
##  5     5     4     4     3     3     3     2     4     3     4     2     5     5
##  6     3     4     4     3     3     2     2     1     2     3     3     4     4
##  7     3     4     5     2     3     3     3     1     3     3     4     5     5
##  8     4     4     4     2     3     3     1     1     4     3     4     5     5
##  9     4     5     4     3     4     3     2     3     3     4     4     5     5
## 10     4     4     3     3     3     3     2     3     2     3     3     4     3
## # ℹ 14 more rows
## # ℹ 22 more variables: P14 <dbl>, P15 <dbl>, P16 <dbl>, P17 <dbl>, P18 <dbl>,
## #   P19 <dbl>, P20 <dbl>, P21 <dbl>, P22 <dbl>, P23 <dbl>, P24 <dbl>,
## #   P25 <dbl>, P26 <dbl>, P27 <dbl>, P28 <dbl>, P29 <dbl>, P30 <dbl>,
## #   P31 <dbl>, P32 <dbl>, P33 <dbl>, P34 <dbl>, P35 <dbl>
matriz_cor <- cor(datos_preguntas, use = "pairwise.complete.obs")

cor_matrix <- cor(datos_preguntas, use = "complete.obs", method = "spearman")


corrplot(matriz_cor, method = "color", type = "upper", 
         tl.col = "black", tl.cex = 0.8, 
         col = colorRampPalette(c("blue", "white", "red"))(200))

table(Datavinculacionl$Sexo, Datavinculacionl$P3)
##    
##     De acuerdo Neutral Totalmente de acuerdo
##   F          7       4                     5
##   M          5       1                     2
chisq=chisq.test(table(Datavinculacionl$Sexo, Datavinculacionl$P3))
## Warning in chisq.test(table(Datavinculacionl$Sexo, Datavinculacionl$P3)):
## Chi-squared approximation may be incorrect
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  table(Datavinculacionl$Sexo, Datavinculacionl$P3)
## X-squared = 0.84643, df = 2, p-value = 0.6549
chisq$expected 
##    
##     De acuerdo  Neutral Totalmente de acuerdo
##   F          8 3.333333              4.666667
##   M          4 1.666667              2.333333
#Si la distribución de las respuestas a la pregunta P1 difiere significativamente entre estudiantes de zona urbana y zona rural.
# Tabla cruzada
table(Datavinculacionl$Sexo, Datavinculacionl$P1)
##    
##     De acuerdo Neutral Totalmente de acuerdo
##   F         11       4                     1
##   M          5       3                     0
# Con porcentajes
porcentajes_tabla=prop.table(table(Datavinculacionl$Sexo, Datavinculacionl$P1), margin = 1)


# Luego, multiplicar por 100 y redondea los valores
tabla_entera <- (porcentajes_tabla * 100)

# Muestra el resultado
print(tabla_entera)
##    
##     De acuerdo Neutral Totalmente de acuerdo
##   F      68.75   25.00                  6.25
##   M      62.50   37.50                  0.00
# Comparación de P1 según Zona
wilcox.test(P1 ~ Zona, data = Datavinculacion)  # Prueba no paramétrica
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  P1 by Zona
## W = 59.5, p-value = 0.3215
## alternative hypothesis: true location shift is not equal to 0
#Evalúa si la distribución de las respuestas a P1 (que normalmente es ordinal, de 1 a 5) es diferente entre los estudiantes de zona rural y urbana.
#install.packages("factoextra")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
datos_preguntas
## # A tibble: 24 × 35
##       P1    P2    P3    P4    P5    P6    P7    P8    P9   P10   P11   P12   P13
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     3     4     4     2     2     3     3     2     2     3     3     3     4
##  2     4     4     4     3     3     4     2     3     3     3     4     4     4
##  3     3     4     5     2     2     3     2     2     3     3     3     4     4
##  4     3     5     4     2     4     4     1     3     3     3     4     5     4
##  5     5     4     4     3     3     3     2     4     3     4     2     5     5
##  6     3     4     4     3     3     2     2     1     2     3     3     4     4
##  7     3     4     5     2     3     3     3     1     3     3     4     5     5
##  8     4     4     4     2     3     3     1     1     4     3     4     5     5
##  9     4     5     4     3     4     3     2     3     3     4     4     5     5
## 10     4     4     3     3     3     3     2     3     2     3     3     4     3
## # ℹ 14 more rows
## # ℹ 22 more variables: P14 <dbl>, P15 <dbl>, P16 <dbl>, P17 <dbl>, P18 <dbl>,
## #   P19 <dbl>, P20 <dbl>, P21 <dbl>, P22 <dbl>, P23 <dbl>, P24 <dbl>,
## #   P25 <dbl>, P26 <dbl>, P27 <dbl>, P28 <dbl>, P29 <dbl>, P30 <dbl>,
## #   P31 <dbl>, P32 <dbl>, P33 <dbl>, P34 <dbl>, P35 <dbl>
pca_result <- prcomp(datos_preguntas, scale. = TRUE)
summary(pca_result)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.5891 2.0836 1.73843 1.61721 1.57111 1.46441 1.3555
## Proportion of Variance 0.1915 0.1240 0.08635 0.07473 0.07053 0.06127 0.0525
## Cumulative Proportion  0.1915 0.3156 0.40192 0.47664 0.54717 0.60844 0.6609
##                            PC8     PC9    PC10    PC11    PC12    PC13   PC14
## Standard deviation     1.33256 1.31981 1.19901 1.10234 1.01360 0.94400 0.8714
## Proportion of Variance 0.05073 0.04977 0.04107 0.03472 0.02935 0.02546 0.0217
## Cumulative Proportion  0.71167 0.76144 0.80252 0.83723 0.86659 0.89205 0.9137
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.84257 0.78190 0.70570 0.64352 0.57106 0.45484 0.36162
## Proportion of Variance 0.02028 0.01747 0.01423 0.01183 0.00932 0.00591 0.00374
## Cumulative Proportion  0.93403 0.95149 0.96572 0.97756 0.98687 0.99278 0.99652
##                           PC22    PC23     PC24
## Standard deviation     0.28823 0.19681 6.57e-16
## Proportion of Variance 0.00237 0.00111 0.00e+00
## Cumulative Proportion  0.99889 1.00000 1.00e+00
# Porcentaje de varianza explicada por cada componente
varianza <- pca_result$sdev^2 / sum(pca_result$sdev^2)

# Varianza acumulada
var_acumulada <- cumsum(varianza)

# Mostrar los primeros 10 componentes
data.frame(Componente = 1:10,
           `Varianza (%)` = round(varianza[1:10] * 100, 2),
           `Acumulada (%)` = round(var_acumulada[1:10] * 100, 2))
##    Componente Varianza.... Acumulada....
## 1           1        19.15         19.15
## 2           2        12.40         31.56
## 3           3         8.63         40.19
## 4           4         7.47         47.66
## 5           5         7.05         54.72
## 6           6         6.13         60.84
## 7           7         5.25         66.09
## 8           8         5.07         71.17
## 9           9         4.98         76.14
## 10         10         4.11         80.25
fviz_pca_var(pca_result, col.var = "contrib",
             gradient.cols = c("blue", "yellow", "red"),
             repel = TRUE)