1. Cargar librerias library (readr) (ggplot2) (dplyr)

library(readr)
library(dplyr)    
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

2. Cargar los datos de la dirección citada. read.csv()

datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/probabilidad-y-estad-stica/master/datos/promedios%20alumnos/datos%20alumnos%20promedios%20SEP%202020.csv", encoding = "UTF-8")
head(datos)
##   No..Control Alumno Semestre Cr..Apr. Carga Promedio  Carrera
## 1    20190001      1       11      198    19    80.21 SISTEMAS
## 2    20190002      2       11      235    10    84.33 SISTEMAS
## 3    20190003      3        9      235    10    95.25 SISTEMAS
## 4    20190004      4        9      226    19    95.00 SISTEMAS
## 5    20190005      5       10      231    14    82.32 SISTEMAS
## 6    20190006      6        9      212    23    95.02 SISTEMAS
tail(datos)
##      No..Control Alumno Semestre Cr..Apr. Carga Promedio        Carrera
## 5924    20195924   5924        2       27    28    92.83 ADMINISTRACION
## 5925    20195925   5925        7       94    13    80.95 ADMINISTRACION
## 5926    20195926   5926        5      103    32    92.68 ADMINISTRACION
## 5927    20195927   5927        4       79    34    86.18 ADMINISTRACION
## 5928    20195928   5928        5      108    32    90.48 ADMINISTRACION
## 5929    20195929   5929        7      169    32    92.33 ADMINISTRACION

3.Genera gráfica de caja de promedios de alumnos en función de las carreras para identificar cuartiles (1 gráfica)

ggplot(filter(datos, Promedio > 0), aes(x = Carrera, y = Promedio, color= Carrera ) ) +
    geom_boxplot() +
  labs(title = "Promedios de Administración")

### 4. Identificar medidas de tendencia central con histograma y líneas de sus media y mediana de cada carrera en función de los promedios (14 gráficas)

Administracion

administracion <- filter (datos, Promedio > 0 & Carrera == "ADMINISTRACION") 

mean(administracion$Promedio)
## [1] 89.44312
median(administracion$Promedio)
## [1] 89.605
ggplot(administracion, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Administración",subtitle =  paste("Media = ", round(mean(administracion$Promedio),2), ", Mediana = ", round(median(administracion$Promedio),2))) 

# Arquitectura

arquitectura <- filter (datos, Promedio > 0 & Carrera == "ARQUITECTURA") 

mean(arquitectura$Promedio)
## [1] 86.46481
median(arquitectura$Promedio)
## [1] 86.58
ggplot(arquitectura, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Arquitectura",subtitle =  paste("Media = ", round(mean(arquitectura$Promedio),2), ", Mediana = ", round(median(arquitectura$Promedio),2))) 

# Bioquimica

bioquimica <- filter (datos, Promedio > 0 & Carrera == "BIOQUIMICA") 

mean(bioquimica$Promedio)
## [1] 84.68143
median(bioquimica$Promedio)
## [1] 84.06
ggplot(bioquimica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Bioquimica",subtitle =  paste("Media = ", round(mean(bioquimica$Promedio),2), ", Mediana = ", round(median(bioquimica$Promedio),2))) 

# CIVIL

civil <- filter (datos, Promedio > 0 & Carrera == "CIVIL") 

mean(civil$Promedio)
## [1] 84.281
median(civil$Promedio)
## [1] 83.915
ggplot(civil, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Civil",subtitle =  paste("Media = ", round(mean(civil$Promedio),2), ", Mediana = ", round(median(civil$Promedio),2))) 

Electrica

electrica <- filter (datos, Promedio > 0 & Carrera == "ELECTRICA") 

mean(electrica$Promedio)
## [1] 83.77305
median(electrica$Promedio)
## [1] 83.24
ggplot(electrica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Electrica",subtitle =  paste("Media = ", round(mean(electrica$Promedio),2), ", Mediana = ", round(median(electrica$Promedio),2))) 

Electronica

electronica <- filter (datos, Promedio > 0 & Carrera == "ELECTRONICA") 

mean(electronica$Promedio)
## [1] 86.6572
median(electronica$Promedio)
## [1] 86.67
ggplot(electronica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Electronica",subtitle =  paste("Media = ", round(mean(electronica$Promedio),2), ", Mediana = ", round(median(electronica$Promedio),2))) 

Gestion Empresarial

gestionempresarial <- filter (datos, Promedio > 0 & Carrera == "GESTION EMPRESARIAL") 

mean(gestionempresarial$Promedio)
## [1] 87.66966
median(gestionempresarial$Promedio)
## [1] 87.59
ggplot(gestionempresarial, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de GestionEmpresarial",subtitle =  paste("Media = ", round(mean(gestionempresarial$Promedio),2), ", Mediana = ", round(median(gestionempresarial$Promedio),2))) 

# Industrial

industrial <- filter (datos, Promedio > 0 & Carrera == "INDUSTRIAL") 

mean(industrial$Promedio)
## [1] 85.01737
median(industrial$Promedio)
## [1] 84.64
ggplot(industrial, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de INDUSTRIAL",subtitle =  paste("Media = ", round(mean(industrial$Promedio),2), ", Mediana = ", round(median(industrial$Promedio),2)))

# Informatica

informatica <- filter (datos, Promedio > 0 & Carrera == "INFORMATICA") 

mean(informatica$Promedio)
## [1] 86.26577
median(informatica$Promedio)
## [1] 86.64
ggplot(informatica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de INFORMATICA",subtitle =  paste("Media = ", round(mean(informatica$Promedio),2), ", Mediana = ", round(median(informatica$Promedio),2)))

# Mecanica

mecanica <- filter (datos, Promedio > 0 & Carrera == "MECANICA") 

mean(mecanica$Promedio)
## [1] 82.58467
median(mecanica$Promedio)
## [1] 82.02
ggplot(mecanica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de MECANICA",subtitle =  paste("Media = ", round(mean(mecanica$Promedio),2), ", Mediana = ", round(median(mecanica$Promedio),2)))

Mecatronica

mecatronica <- filter (datos, Promedio > 0 & Carrera == "MECATRONICA") 

mean(mecatronica$Promedio)
## [1] 84.45948
median(mecatronica$Promedio)
## [1] 84.085
ggplot(mecatronica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de MECATRONICA",subtitle =  paste("Media = ", round(mean(mecatronica$Promedio),2), ", Mediana = ", round(median(mecatronica$Promedio),2)))

Quimica

quimica <- filter (datos, Promedio > 0 & Carrera == "QUIMICA") 

mean(quimica$Promedio)
## [1] 86.05215
median(quimica$Promedio)
## [1] 85.67
ggplot(quimica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de QUIMICA",subtitle =  paste("Media = ", round(mean(quimica$Promedio),2), ", Mediana = ", round(median(quimica$Promedio),2)))

# Sistemas

sistemas <- filter (datos, Promedio > 0 & Carrera == "SISTEMAS") 

mean(sistemas$Promedio)
## [1] 85.90464
median(sistemas$Promedio)
## [1] 85.34
ggplot(sistemas, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de SISTEMAS",subtitle =  paste("Media = ", round(mean(sistemas$Promedio),2), ", Mediana = ", round(median(sistemas$Promedio),2)))

TIC

tic <- filter (datos, Promedio > 0 & Carrera == "TIC") 

mean(tic$Promedio)
## [1] 84.31719
median(tic$Promedio)
## [1] 83.24
ggplot(tic, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de TIC",subtitle =  paste("Media = ", round(mean(tic$Promedio),2), ", Mediana = ", round(median(tic$Promedio),2)))

5. Identificar varianza y desviación estándar en una gráfica de dispersión de los promedios de cada carrera (14 gráficas)

n <- nrow(administracion)
ggplot(administracion, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Administración", subtitle =  paste("Varianza = ", round(var(administracion$Promedio),2), ", DesvStd = ", round(sd(administracion$Promedio),2), ", C.V. = ",  round(sd(administracion$Promedio) / mean(administracion$Promedio) *  100, 2 )))

Arquitectura

n <- nrow(arquitectura)
ggplot(arquitectura, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Arquitectura", subtitle =  paste("Varianza = ", round(var(arquitectura$Promedio),2), ", DesvStd = ", round(sd(arquitectura$Promedio),2), ", C.V. = ",  round(sd(arquitectura$Promedio) / mean(arquitectura$Promedio) *  100, 2 )))

Bioquimica

n <- nrow(bioquimica)
ggplot(bioquimica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Bioquimica", subtitle =  paste("Varianza = ", round(var(bioquimica$Promedio),2), ", DesvStd = ", round(sd(bioquimica$Promedio),2), ", C.V. = ",  round(sd(bioquimica$Promedio) / mean(bioquimica$Promedio) *  100, 2 )))

# Civil

n <- nrow(civil)
ggplot(civil, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Civil", subtitle =  paste("Varianza = ", round(var(civil$Promedio),2), ", DesvStd = ", round(sd(civil$Promedio),2), ", C.V. = ",  round(sd(civil$Promedio) / mean(civil$Promedio) *  100, 2 )))

Electrica

n <- nrow(electrica)
ggplot(electrica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Electrica", subtitle =  paste("Varianza = ", round(var(electrica$Promedio),2), ", DesvStd = ", round(sd(electrica$Promedio),2), ", C.V. = ",  round(sd(electrica$Promedio) / mean(electrica$Promedio) *  100, 2 )))

# Electronica

n <- nrow(electronica)
ggplot(electronica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Electronica", subtitle =  paste("Varianza = ", round(var(electrica$Promedio),2), ", DesvStd = ", round(sd(electronica$Promedio),2), ", C.V. = ",  round(sd(electronica$Promedio) / mean(electronica$Promedio) *  100, 2 )))

# Gestion Empresarial

n <- nrow(gestionempresarial)
ggplot(gestionempresarial, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Gestion Empresarial", subtitle =  paste("Varianza = ", round(var(gestionempresarial$Promedio),2), ", DesvStd = ", round(sd(gestionempresarial$Promedio),2), ", C.V. = ",  round(sd(gestionempresarial$Promedio) / mean(gestionempresarial$Promedio) *  100, 2 )))

# Industrial

n <- nrow(industrial)
ggplot(industrial, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Industrial", subtitle =  paste("Varianza = ", round(var(industrial$Promedio),2), ", DesvStd = ", round(sd(industrial$Promedio),2), ", C.V. = ",  round(sd(industrial$Promedio) / mean(industrial$Promedio) *  100, 2 )))

#Informatica

n <- nrow(informatica)
ggplot(informatica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Informatica", subtitle =  paste("Varianza = ", round(var(informatica$Promedio),2), ", DesvStd = ", round(sd(informatica$Promedio),2), ", C.V. = ",  round(sd(informatica$Promedio) / mean(informatica$Promedio) *  100, 2 )))

# Mecanica

n <- nrow(mecanica)
ggplot(mecanica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Mecanica", subtitle =  paste("Varianza = ", round(var(mecanica$Promedio),2), ", DesvStd = ", round(sd(mecanica$Promedio),2), ", C.V. = ",  round(sd(mecanica$Promedio) / mean(mecanica$Promedio) *  100, 2 )))

# Mecatronica

n <- nrow(mecatronica)
ggplot(mecatronica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Mecatronica", subtitle =  paste("Varianza = ", round(var(electrica$Promedio),2), ", DesvStd = ", round(sd(mecatronica$Promedio),2), ", C.V. = ",  round(sd(mecatronica$Promedio) / mean(mecatronica$Promedio) *  100, 2 )))

# Quimica

n <- nrow(quimica)
ggplot(quimica, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Quimica", subtitle =  paste("Varianza = ", round(var(quimica$Promedio),2), ", DesvStd = ", round(sd(quimica$Promedio),2), ", C.V. = ",  round(sd(quimica$Promedio) / mean(quimica$Promedio) *  100, 2 )))

# Sistemas

n <- nrow(sistemas)
ggplot(sistemas, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de Sistemas", subtitle =  paste("Varianza = ", round(var(sistemas$Promedio),2), ", DesvStd = ", round(sd(sistemas$Promedio),2), ", C.V. = ",  round(sd(sistemas$Promedio) / mean(sistemas$Promedio) *  100, 2 )))

# TIC

n <- nrow(tic)
ggplot(tic, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersión de Promedio de TIC", subtitle =  paste("Varianza = ", round(var(tic$Promedio),2), ", DesvStd = ", round(sd(tic$Promedio),2), ", C.V. = ",  round(sd(tic$Promedio) / mean(tic$Promedio) *  100, 2 )))

# 6. Determinar una tabla para todos los parámetros estadísticos solicitados

tabla <- datos %>%
    group_by (Carrera) %>%
    summarize(n = n(), media = mean(Promedio), mediana = median(Promedio), vari = var(Promedio), desvstd = sd(Promedio), cv = desvstd / media * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
tabla
## # A tibble: 14 x 7
##    Carrera                 n media mediana  vari desvstd    cv
##    <chr>               <int> <dbl>   <dbl> <dbl>   <dbl> <dbl>
##  1 ADMINISTRACION        497  74.5    88.4 1125.    33.5  45.0
##  2 ARQUITECTURA          675  70.1    85.4 1163.    34.1  48.7
##  3 BIOQUIMICA            441  68.6    82.8 1126.    33.6  48.9
##  4 CIVIL                 648  73.1    83.1  834.    28.9  39.5
##  5 ELECTRICA             280  60.7    81.8 1414.    37.6  61.9
##  6 ELECTRONICA           161  67.3    85.3 1324.    36.4  54.1
##  7 GESTION EMPRESARIAL   585  74.2    86.7 1013.    31.8  42.9
##  8 INDUSTRIAL            707  74.2    83.7  819.    28.6  38.6
##  9 INFORMATICA           101  60.6    83.6 1581.    39.8  65.6
## 10 MECANICA              301  61.7    80.7 1302.    36.1  58.4
## 11 MECATRONICA           432  70.8    83.4  981.    31.3  44.3
## 12 QUIMICA               568  72.6    84.6  996.    31.6  43.5
## 13 SISTEMAS              452  70.9    84.1 1081.    32.9  46.4
## 14 TIC                    81  66.6    81.7 1209.    34.8  52.2

7. Interpretación del CASO 5

Al concluir el caso y ver cada grafica de los promedios generales, la carrera de INFORMATICA es la que tiene menor promedio y con el mayor promedio es ADMINISTRACIÓN. Cada grafica de histograma, la carrera de MECANICA tiene media 82.58 y mediana de 82.02 siendo la mas baja de todas. Mientras ADMINISTRACION tiene la mas alta con 89.44 de media y 89.6 de mediana.