Identificar medidas de dispersión y visualizar datos de alumnos inscritos de una institución de educación superior.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/probabilidad-y-estad-stica/master/datos/promedios%20alumnos/datos%20alumnos%20promedios%20SEP%202020.csv", encoding = "UTF-8")
Identificar los primeros y últimos 6 registros.
head(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 1 20190001 1 11 198 19 80.21 SISTEMAS
## 2 20190002 2 11 235 10 84.33 SISTEMAS
## 3 20190003 3 9 235 10 95.25 SISTEMAS
## 4 20190004 4 9 226 19 95.00 SISTEMAS
## 5 20190005 5 10 231 14 82.32 SISTEMAS
## 6 20190006 6 9 212 23 95.02 SISTEMAS
tail(datos)
## No..Control Alumno Semestre Cr..Apr. Carga Promedio Carrera
## 5924 20195924 5924 2 27 28 92.83 ADMINISTRACION
## 5925 20195925 5925 7 94 13 80.95 ADMINISTRACION
## 5926 20195926 5926 5 103 32 92.68 ADMINISTRACION
## 5927 20195927 5927 4 79 34 86.18 ADMINISTRACION
## 5928 20195928 5928 5 108 32 90.48 ADMINISTRACION
## 5929 20195929 5929 7 169 32 92.33 ADMINISTRACION
ggplot(filter(datos, Promedio > 0), aes(x = Carrera, y = Promedio, color= Carrera ) ) +
geom_boxplot() +
labs(title = "Promedios de Administración")
administracion <- filter (datos, Promedio > 0 & Carrera == "ADMINISTRACION")
mean(administracion$Promedio)
## [1] 89.44312
median(administracion$Promedio)
## [1] 89.605
ggplot(administracion, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Administración",subtitle = paste("Media = ", round(mean(administracion$Promedio),2), ", Mediana = ", round(median(administracion$Promedio),2)))
Arquitectura <- filter (datos, Promedio > 0 & Carrera == "ARQUITECTURA")
mean(Arquitectura$Promedio)
## [1] 86.46481
median(Arquitectura$Promedio)
## [1] 86.58
ggplot(Arquitectura, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Arquitectura",subtitle = paste("Media = ", round(mean(Arquitectura$Promedio),2), ", Mediana = ", round(median(Arquitectura$Promedio),2)))
Bioquimica<- filter (datos, Promedio > 0 & Carrera == "BIOQUIMICA")
mean(Bioquimica$Promedio)
## [1] 84.68143
median(Bioquimica$Promedio)
## [1] 84.06
ggplot(Bioquimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Bioquimica",subtitle = paste("Media = ", round(mean(Bioquimica$Promedio),2), ", Mediana = ", round(median(Bioquimica$Promedio),2)))
electrica<- filter (datos, Promedio > 0 & Carrera == "ELECTRICA")
mean(electrica$Promedio)
## [1] 83.77305
median(electrica$Promedio)
## [1] 83.24
ggplot(electrica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Electrica",subtitle = paste("Media = ", round(mean(electrica$Promedio),2), ", Mediana = ", round(median(electrica$Promedio),2)))
civil<- filter (datos, Promedio > 0 & Carrera == "CIVIL")
mean(civil$Promedio)
## [1] 84.281
median(civil$Promedio)
## [1] 83.915
ggplot(civil, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Civil",subtitle = paste("Media = ", round(mean(civil$Promedio),2), ", Mediana = ", round(median(civil$Promedio),2)))
electronica<- filter (datos, Promedio > 0 & Carrera == "ELECTRONICA")
mean(electronica$Promedio)
## [1] 86.6572
median(electronica$Promedio)
## [1] 86.67
ggplot(electronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Electronica",subtitle = paste("Media = ", round(mean(electronica$Promedio),2), ", Mediana = ", round(median(electronica$Promedio),2)))
GESTION<- filter (datos, Promedio > 0 & Carrera == "GESTION EMPRESARIAL")
mean(GESTION$Promedio)
## [1] 87.66966
median(GESTION$Promedio)
## [1] 87.59
ggplot(GESTION, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Gestion Empresarial",subtitle = paste("Media = ", round(mean(GESTION$Promedio),2), ", Mediana = ", round(median(GESTION$Promedio),2)))
Informatica<- filter (datos, Promedio > 0 & Carrera == "INFORMATICA")
mean(Informatica$Promedio)
## [1] 86.26577
median(Informatica$Promedio)
## [1] 86.64
ggplot(Informatica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Informatica",subtitle = paste("Media = ", round(mean(Informatica$Promedio),2), ", Mediana = ", round(median(Informatica$Promedio),2)))
Industrial<- filter (datos, Promedio > 0 & Carrera == "INDUSTRIAL")
mean(Industrial$Promedio)
## [1] 85.01737
median(Industrial$Promedio)
## [1] 84.64
ggplot(Industrial, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Industrial",subtitle = paste("Media = ", round(mean(Industrial$Promedio),2), ", Mediana = ", round(median(Industrial$Promedio),2)))
Mecanica<- filter (datos, Promedio > 0 & Carrera == "MECANICA")
mean(Mecanica$Promedio)
## [1] 82.58467
median(Mecanica$Promedio)
## [1] 82.02
ggplot(Mecanica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecanica",subtitle = paste("Media = ", round(mean(Mecanica$Promedio),2), ", Mediana = ", round(median(Mecanica$Promedio),2)))
Quimica<- filter (datos, Promedio > 0 & Carrera == "QUIMICA")
mean(Quimica$Promedio)
## [1] 86.05215
median(Quimica$Promedio)
## [1] 85.67
ggplot(Quimica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Quimica",subtitle = paste("Media = ", round(mean(Quimica$Promedio),2), ", Mediana = ", round(median(Quimica$Promedio),2)))
Mecatronica<- filter (datos, Promedio > 0 & Carrera == "MECATRONICA")
mean(Mecatronica$Promedio)
## [1] 84.45948
median(Mecatronica$Promedio)
## [1] 84.085
ggplot(Mecatronica, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Mecatronica",subtitle = paste("Media = ", round(mean(Mecatronica$Promedio),2), ", Mediana = ", round(median(Mecatronica$Promedio),2)))
Sistemas<- filter (datos, Promedio > 0 & Carrera == "SISTEMAS")
mean(Sistemas$Promedio)
## [1] 85.90464
median(Sistemas$Promedio)
## [1] 85.34
ggplot(Sistemas, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Sistemas",subtitle = paste("Media = ", round(mean(Sistemas$Promedio),2), ", Mediana = ", round(median(Sistemas$Promedio),2)))
Tic<- filter (datos, Promedio > 0 & Carrera == "TIC")
mean(Tic$Promedio)
## [1] 84.31719
median(Tic$Promedio)
## [1] 83.24
ggplot(Tic, aes(Promedio)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = median(Promedio),
color = "mediana"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = mean(Promedio),
color = "media"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Promedio de Tic´s",subtitle = paste("Media = ", round(mean(Tic$Promedio),2), ", Mediana = ", round(median(Tic$Promedio),2)))
n <- nrow(administracion)
ggplot(administracion, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Administración", subtitle = paste("Varianza = ", round(var(administracion$Promedio),2), ", DesvStd = ", round(sd(administracion$Promedio),2), ", C.V. = ", round(sd(administracion$Promedio) / mean(administracion$Promedio) * 100, 2 )))
n <- nrow(Arquitectura)
ggplot(Arquitectura, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Arquitectura", subtitle = paste("Varianza = ", round(var(Arquitectura$Promedio),2), ", DesvStd = ", round(sd(Arquitectura$Promedio),2), ", C.V. = ", round(sd(Arquitectura$Promedio) / mean(Arquitectura$Promedio) * 100, 2 )))
n <- nrow(Quimica)
ggplot(Quimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Química", subtitle = paste("Varianza = ", round(var(Quimica$Promedio),2), ", DesvStd = ", round(sd(Quimica$Promedio),2), ", C.V. = ", round(sd(Quimica$Promedio) / mean(Quimica$Promedio) * 100, 2 )))
n <- nrow(Industrial)
ggplot(Industrial, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Industrial", subtitle = paste("Varianza = ", round(var(Industrial$Promedio),2), ", DesvStd = ", round(sd(Industrial$Promedio),2), ", C.V. = ", round(sd(Industrial$Promedio) / mean(Industrial$Promedio) * 100, 2 )))
n <- nrow(Bioquimica)
ggplot(Bioquimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Bioquimica", subtitle = paste("Varianza = ", round(var(Bioquimica$Promedio),2), ", DesvStd = ", round(sd(Bioquimica$Promedio),2), ", C.V. = ", round(sd(Bioquimica$Promedio) / mean(Bioquimica$Promedio) * 100, 2 )))
n <- nrow(civil)
ggplot(civil, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Civil", subtitle = paste("Varianza = ", round(var(civil$Promedio),2), ", DesvStd = ", round(sd(civil$Promedio),2), ", C.V. = ", round(sd(civil$Promedio) / mean(civil$Promedio) * 100, 2 )))
n <- nrow(electrica)
ggplot(electrica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Eléctrica", subtitle = paste("Varianza = ", round(var(electrica$Promedio),2), ", DesvStd = ", round(sd(electrica$Promedio),2), ", C.V. = ", round(sd(electrica$Promedio) / mean(electrica$Promedio) * 100, 2 )))
#### Gestión Empresarial.
n <- nrow(GESTION)
ggplot(GESTION, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Gestión Empresarial", subtitle = paste("Varianza = ", round(var(GESTION$Promedio),2), ", DesvStd = ", round(sd(GESTION$Promedio),2), ", C.V. = ", round(sd(GESTION$Promedio) / mean(GESTION$Promedio) * 100, 2 )))
n <- nrow(electronica)
ggplot(electronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Electrónica", subtitle = paste("Varianza = ", round(var(electronica$Promedio),2), ", DesvStd = ", round(sd(electronica$Promedio),2), ", C.V. = ", round(sd(electronica$Promedio) / mean(electronica$Promedio) * 100, 2 )))
n <- nrow(Industrial)
ggplot(Industrial, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Industrial", subtitle = paste("Varianza = ", round(var(Industrial$Promedio),2), ", DesvStd = ", round(sd(Industrial$Promedio),2), ", C.V. = ", round(sd(Industrial$Promedio) / mean(Industrial$Promedio) * 100, 2 )))
n <- nrow(Mecanica)
ggplot(Mecanica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecánica", subtitle = paste("Varianza = ", round(var(Mecanica$Promedio),2), ", DesvStd = ", round(sd(Mecanica$Promedio),2), ", C.V. = ", round(sd(Mecanica$Promedio) / mean(Mecanica$Promedio) * 100, 2 )))
n <- nrow(Mecatronica)
ggplot(Mecatronica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Mecatrónica", subtitle = paste("Varianza = ", round(var(Mecatronica$Promedio),2), ", DesvStd = ", round(sd(Mecatronica$Promedio),2), ", C.V. = ", round(sd(Mecatronica$Promedio) / mean(Mecatronica$Promedio) * 100, 2 )))
n <- nrow(Informatica)
ggplot(Informatica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Informática", subtitle = paste("Varianza = ", round(var(Informatica$Promedio),2), ", DesvStd = ", round(sd(Informatica$Promedio),2), ", C.V. = ", round(sd(Informatica$Promedio) / mean(Informatica$Promedio) * 100, 2 )))
#### Sistemas.
n <- nrow(Sistemas)
ggplot(Sistemas, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Sistemas", subtitle = paste("Varianza = ", round(var(Sistemas$Promedio),2), ", DesvStd = ", round(sd(Sistemas$Promedio),2), ", C.V. = ", round(sd(Sistemas$Promedio) / mean(Sistemas$Promedio) * 100, 2 )))
n <- nrow(Quimica)
ggplot(Quimica, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de Química", subtitle = paste("Varianza = ", round(var(Quimica$Promedio),2), ", DesvStd = ", round(sd(Quimica$Promedio),2), ", C.V. = ", round(sd(Quimica$Promedio) / mean(Quimica$Promedio) * 100, 2 )))
n <- nrow(Tic)
ggplot(Tic, aes(x = 1:n, y = Promedio)) +
geom_point() +
labs(title = "Dispersión de Promedio de TIC´S", subtitle = paste("Varianza = ", round(var(Tic$Promedio),2), ", DesvStd = ", round(sd(Tic$Promedio),2), ", C.V. = ", round(sd(Tic$Promedio) / mean(Tic$Promedio) * 100, 2 )))
#### 6. Determinar una tabla para todos los parámetros estadísticos solicitados.
tabla <- datos %>%
group_by (Carrera) %>%
summarize(n = n(), media = mean(Promedio), mediana = median(Promedio), vari = var(Promedio),
desvstd = sd(Promedio), cv = desvstd / media * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
tabla
## # A tibble: 14 x 7
## Carrera n media mediana vari desvstd cv
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ADMINISTRACION 497 74.5 88.4 1125. 33.5 45.0
## 2 ARQUITECTURA 675 70.1 85.4 1163. 34.1 48.7
## 3 BIOQUIMICA 441 68.6 82.8 1126. 33.6 48.9
## 4 CIVIL 648 73.1 83.1 834. 28.9 39.5
## 5 ELECTRICA 280 60.7 81.8 1414. 37.6 61.9
## 6 ELECTRONICA 161 67.3 85.3 1324. 36.4 54.1
## 7 GESTION EMPRESARIAL 585 74.2 86.7 1013. 31.8 42.9
## 8 INDUSTRIAL 707 74.2 83.7 819. 28.6 38.6
## 9 INFORMATICA 101 60.6 83.6 1581. 39.8 65.6
## 10 MECANICA 301 61.7 80.7 1302. 36.1 58.4
## 11 MECATRONICA 432 70.8 83.4 981. 31.3 44.3
## 12 QUIMICA 568 72.6 84.6 996. 31.6 43.5
## 13 SISTEMAS 452 70.9 84.1 1081. 32.9 46.4
## 14 TIC 81 66.6 81.7 1209. 34.8 52.2
El caso cinco tiene como objetivo identificar medidas de dispersión y visualizar datos de los alumnos inscritos en una institución de educación superior, primero que nada, nos mostrará la identificación de los primeros y últimos 6 datos de los alumnos, después se saca el promedio de las distintas carreras con las que cuenta la institución como podrían ser Química, Informática, Industrial, etc. Finalmente se identifican los parámetros estadísticos de las carreras en las que en una tabla de cada una tendremos los datos de media,mediana,cv,desvstd y vari.