LECTURA DEL ARCHIVO Salaries COMO TABLA

salaries <- read.table(
  "C:/Users/valen/Desktop/Valen/ESTUDIOS/MAESTRÍA/Nueva carpeta/TALLER 2/TALLER 2/Salaries.csv.csv",
  sep = ";", header = TRUE, dec = ".", na.strings = "NA"
)

str(salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : chr  "Prof" "Prof" "AsstProf" "Prof" ...
##  $ discipline   : chr  "B" "B" "B" "B" ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : chr  "Male" "Male" "Male" "Male" ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
summary(salaries)
##      rank            discipline        yrs.since.phd    yrs.service   
##  Length:397         Length:397         Min.   : 1.00   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.:12.00   1st Qu.: 7.00  
##  Mode  :character   Mode  :character   Median :21.00   Median :16.00  
##                                        Mean   :22.31   Mean   :17.61  
##                                        3rd Qu.:32.00   3rd Qu.:27.00  
##                                        Max.   :56.00   Max.   :60.00  
##      sex                salary      
##  Length:397         Min.   : 57800  
##  Class :character   1st Qu.: 91000  
##  Mode  :character   Median :107300  
##                     Mean   :113706  
##                     3rd Qu.:134185  
##                     Max.   :231545
table(salaries$sex)
## 
## Female   Male 
##     39    358

CONVERSION DE VARIABLES A FACTORES

salaries$rank       <- factor(salaries$rank,levels = c("AsstProf", "AssocProf", "Prof"), ordered = TRUE)
salaries$discipline <- factor(salaries$discipline,levels = c("A", "B"))
salaries$sex        <- factor(salaries$sex,levels = c("Female", "Male"))
str(salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Ord.factor w/ 3 levels "AsstProf"<"AssocProf"<..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...

ESTADISTICAS DESCRIPTIVAS INICIALES

summary(salaries$salary)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   57800   91000  107300  113706  134185  231545
tapply(salaries$salary, salaries$sex, mean)
##   Female     Male 
## 101002.4 115090.4
tapply(salaries$salary, salaries$sex, median)
## Female   Male 
## 103750 108043
tapply(salaries$salary, salaries$rank, mean)
##  AsstProf AssocProf      Prof 
##  80775.99  93876.44 126772.11
tapply(salaries$salary, salaries$rank, median)
##  AsstProf AssocProf      Prof 
##   79800.0   95626.5  123321.5
table(salaries$sex)
## 
## Female   Male 
##     39    358
table(salaries$rank)
## 
##  AsstProf AssocProf      Prof 
##        67        64       266

GRAFICOS EXPLORATORIOS

boxplot(salary ~ sex, data = salaries,
        main = "Distribución del salario por sexo",
        xlab = "Sexo", ylab = "Salario (USD)",
        col = c("pink", "lightblue"))

boxplot(salary ~ rank, data = salaries,
        main = "Salario por rango académico",
        xlab = "Rango", ylab = "Salario (USD)",
        col = c("lightgreen", "lightyellow", "lightgray"))

ESTADISTICA DESCRIPTIVA DEL SALARIO POR GENERO

tapply(salaries$salary, salaries$sex, summary)
## $Female
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   62884   77250  103750  101002  117003  161101 
## 
## $Male
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   57800   92000  108043  115090  134864  231545

COMPARACION DE SALARIO POR GENERO Y RANGO

tapply(salaries$salary, list(salaries$rank, salaries$sex), mean)
##              Female      Male
## AsstProf   78049.91  81311.46
## AssocProf  88512.80  94869.70
## Prof      121967.61 127120.82
tapply(salaries$salary, list(salaries$rank, salaries$sex), median)
##             Female     Male
## AsstProf   77000.0  80182.0
## AssocProf  90556.5  95626.5
## Prof      120257.5 123996.0
table(salaries$rank, salaries$sex)
##            
##             Female Male
##   AsstProf      11   56
##   AssocProf     10   54
##   Prof          18  248

PARTICIPACION DE GENERO POR RANGO

tabla_porcentaje <- prop.table(table(salaries$rank, salaries$sex), 2)
tabla_t <- t(tabla_porcentaje)
etiquetas_porcentaje <- paste0(round(tabla_t * 100, 1), "%")

bp <- barplot(
  tabla_t, beside = TRUE, col = c("pink", "lightblue"),
  main = "Distribución por Sexo y Rango",
  xlab = "Rango", ylab = "Proporción",
  ylim = c(0, max(tabla_t) * 1.1)
)

text(x = bp, y = tabla_t, labels = etiquetas_porcentaje, pos = 3, cex = 0.8)
legend("topleft", legend = rownames(tabla_t), fill = c("pink", "lightblue"))

SALARIO POR RANGO ACADEMICO Y GENERO

boxplot(salary ~ sex * rank, data = salaries,
        main = "Salario por Sexo dentro de cada Rango",
        xlab = "Grupos: Sexo y Rango", ylab = "Salario (USD)",
        col = rep(c("pink", "lightblue"), 3),
        las = 1)

EXPERIENCIA ACADEMICA

mean_service <- tapply(salaries$yrs.service, salaries$sex, mean)
median_service <- tapply(salaries$yrs.service, salaries$sex, median)
mean_phd <- tapply(salaries$yrs.since.phd, salaries$sex, mean)
median_phd <- tapply(salaries$yrs.since.phd, salaries$sex, median)

boxplot(yrs.since.phd ~ sex,
        data = salaries,
        main = "Experiencia Académica (Años desde PhD) por Sexo",
        xlab = "Sexo",
        ylab = "Años desde el PhD",
        col = c("pink", "lightblue"))

DISTRIBUCIÓN POR ÁREA APLICADA

conteo_disciplina <- table(salaries$discipline, salaries$sex)
tabla_porcentaje_disciplina <- prop.table(conteo_disciplina, 2)

barplot(
  tabla_porcentaje_disciplina,
  beside = TRUE,
  col = c("coral", "seagreen"),
  main = "Distribución de Género por Disciplina",
  xlab = "Sexo",
  ylab = "Proporción",
  legend.text = c("A (Teórica)", "B (Aplicada)"),
  args.legend = list(x = "topright"),
  ylim = c(0, 1.1)
)