LECTURA DEL ARCHIVO Salaries COMO TABLA
salaries <- read.table(
"C:/Users/valen/Desktop/Valen/ESTUDIOS/MAESTRÍA/Nueva carpeta/TALLER 2/TALLER 2/Salaries.csv.csv",
sep = ";", header = TRUE, dec = ".", na.strings = "NA"
)
str(salaries)
## 'data.frame': 397 obs. of 6 variables:
## $ rank : chr "Prof" "Prof" "AsstProf" "Prof" ...
## $ discipline : chr "B" "B" "B" "B" ...
## $ yrs.since.phd: int 19 20 4 45 40 6 30 45 21 18 ...
## $ yrs.service : int 18 16 3 39 41 6 23 45 20 18 ...
## $ sex : chr "Male" "Male" "Male" "Male" ...
## $ salary : int 139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
summary(salaries)
## rank discipline yrs.since.phd yrs.service
## Length:397 Length:397 Min. : 1.00 Min. : 0.00
## Class :character Class :character 1st Qu.:12.00 1st Qu.: 7.00
## Mode :character Mode :character Median :21.00 Median :16.00
## Mean :22.31 Mean :17.61
## 3rd Qu.:32.00 3rd Qu.:27.00
## Max. :56.00 Max. :60.00
## sex salary
## Length:397 Min. : 57800
## Class :character 1st Qu.: 91000
## Mode :character Median :107300
## Mean :113706
## 3rd Qu.:134185
## Max. :231545
table(salaries$sex)
##
## Female Male
## 39 358
CONVERSION DE VARIABLES A FACTORES
salaries$rank <- factor(salaries$rank,levels = c("AsstProf", "AssocProf", "Prof"), ordered = TRUE)
salaries$discipline <- factor(salaries$discipline,levels = c("A", "B"))
salaries$sex <- factor(salaries$sex,levels = c("Female", "Male"))
str(salaries)
## 'data.frame': 397 obs. of 6 variables:
## $ rank : Ord.factor w/ 3 levels "AsstProf"<"AssocProf"<..: 3 3 1 3 3 2 3 3 3 3 ...
## $ discipline : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
## $ yrs.since.phd: int 19 20 4 45 40 6 30 45 21 18 ...
## $ yrs.service : int 18 16 3 39 41 6 23 45 20 18 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
## $ salary : int 139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
ESTADISTICAS DESCRIPTIVAS INICIALES
summary(salaries$salary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 57800 91000 107300 113706 134185 231545
tapply(salaries$salary, salaries$sex, mean)
## Female Male
## 101002.4 115090.4
tapply(salaries$salary, salaries$sex, median)
## Female Male
## 103750 108043
tapply(salaries$salary, salaries$rank, mean)
## AsstProf AssocProf Prof
## 80775.99 93876.44 126772.11
tapply(salaries$salary, salaries$rank, median)
## AsstProf AssocProf Prof
## 79800.0 95626.5 123321.5
table(salaries$sex)
##
## Female Male
## 39 358
table(salaries$rank)
##
## AsstProf AssocProf Prof
## 67 64 266
GRAFICOS EXPLORATORIOS
boxplot(salary ~ sex, data = salaries,
main = "Distribución del salario por sexo",
xlab = "Sexo", ylab = "Salario (USD)",
col = c("pink", "lightblue"))

boxplot(salary ~ rank, data = salaries,
main = "Salario por rango académico",
xlab = "Rango", ylab = "Salario (USD)",
col = c("lightgreen", "lightyellow", "lightgray"))

ESTADISTICA DESCRIPTIVA DEL SALARIO POR GENERO
tapply(salaries$salary, salaries$sex, summary)
## $Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 62884 77250 103750 101002 117003 161101
##
## $Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 57800 92000 108043 115090 134864 231545
COMPARACION DE SALARIO POR GENERO Y RANGO
tapply(salaries$salary, list(salaries$rank, salaries$sex), mean)
## Female Male
## AsstProf 78049.91 81311.46
## AssocProf 88512.80 94869.70
## Prof 121967.61 127120.82
tapply(salaries$salary, list(salaries$rank, salaries$sex), median)
## Female Male
## AsstProf 77000.0 80182.0
## AssocProf 90556.5 95626.5
## Prof 120257.5 123996.0
table(salaries$rank, salaries$sex)
##
## Female Male
## AsstProf 11 56
## AssocProf 10 54
## Prof 18 248
PARTICIPACION DE GENERO POR RANGO
tabla_porcentaje <- prop.table(table(salaries$rank, salaries$sex), 2)
tabla_t <- t(tabla_porcentaje)
etiquetas_porcentaje <- paste0(round(tabla_t * 100, 1), "%")
bp <- barplot(
tabla_t, beside = TRUE, col = c("pink", "lightblue"),
main = "Distribución por Sexo y Rango",
xlab = "Rango", ylab = "Proporción",
ylim = c(0, max(tabla_t) * 1.1)
)
text(x = bp, y = tabla_t, labels = etiquetas_porcentaje, pos = 3, cex = 0.8)
legend("topleft", legend = rownames(tabla_t), fill = c("pink", "lightblue"))

SALARIO POR RANGO ACADEMICO Y GENERO
boxplot(salary ~ sex * rank, data = salaries,
main = "Salario por Sexo dentro de cada Rango",
xlab = "Grupos: Sexo y Rango", ylab = "Salario (USD)",
col = rep(c("pink", "lightblue"), 3),
las = 1)

EXPERIENCIA ACADEMICA
mean_service <- tapply(salaries$yrs.service, salaries$sex, mean)
median_service <- tapply(salaries$yrs.service, salaries$sex, median)
mean_phd <- tapply(salaries$yrs.since.phd, salaries$sex, mean)
median_phd <- tapply(salaries$yrs.since.phd, salaries$sex, median)
boxplot(yrs.since.phd ~ sex,
data = salaries,
main = "Experiencia Académica (Años desde PhD) por Sexo",
xlab = "Sexo",
ylab = "Años desde el PhD",
col = c("pink", "lightblue"))

DISTRIBUCIÓN POR ÁREA APLICADA
conteo_disciplina <- table(salaries$discipline, salaries$sex)
tabla_porcentaje_disciplina <- prop.table(conteo_disciplina, 2)
barplot(
tabla_porcentaje_disciplina,
beside = TRUE,
col = c("coral", "seagreen"),
main = "Distribución de Género por Disciplina",
xlab = "Sexo",
ylab = "Proporción",
legend.text = c("A (Teórica)", "B (Aplicada)"),
args.legend = list(x = "topright"),
ylim = c(0, 1.1)
)
