1.- Cargar Librerias

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(fdth)
## 
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
## 
##     sd, var
library(ggplot2)
library(fdth)

2.- Cargar Datos

datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/probabilidad-y-estad-stica/master/datos/promedios%20alumnos/datos%20alumnos%20promedios%20SEP%202020.csv?fbclid=IwAR25NkkYE7ReSQnH_JbYJnEyiFzIAACdmSWlUdl3nb_AQhZ8-g0w-DoeY4Q",  encoding = "UTF-8" )
head(datos)
##   No..Control Alumno Semestre Cr..Apr. Carga Promedio  Carrera
## 1    20190001      1       11      198    19    80.21 SISTEMAS
## 2    20190002      2       11      235    10    84.33 SISTEMAS
## 3    20190003      3        9      235    10    95.25 SISTEMAS
## 4    20190004      4        9      226    19    95.00 SISTEMAS
## 5    20190005      5       10      231    14    82.32 SISTEMAS
## 6    20190006      6        9      212    23    95.02 SISTEMAS
tail(datos)
##      No..Control Alumno Semestre Cr..Apr. Carga Promedio        Carrera
## 5924    20195924   5924        2       27    28    92.83 ADMINISTRACION
## 5925    20195925   5925        7       94    13    80.95 ADMINISTRACION
## 5926    20195926   5926        5      103    32    92.68 ADMINISTRACION
## 5927    20195927   5927        4       79    34    86.18 ADMINISTRACION
## 5928    20195928   5928        5      108    32    90.48 ADMINISTRACION
## 5929    20195929   5929        7      169    32    92.33 ADMINISTRACION
ggplot(filter(datos, Promedio > 0), aes(x = Carrera, y = Promedio, color= Carrera ) ) +
    geom_boxplot() +
  labs(title = "Promedios de Administracion")

administracion <- filter (datos, Promedio > 0 & Carrera == "ADMINISTRACION") 

mean(administracion$Promedio)
## [1] 89.44312

3.- Generar Grafico de Caja

ggplot(filter(datos, Promedio>0), aes(x = Carrera, y = Promedio, color= Carrera)) +
  geom_boxplot() +
  labs(title = "Promedios de Administracion")

ggplot(administracion, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Administracion",subtitle =  paste("Media = ", round(mean(administracion$Promedio),2), ", Mediana = ", round(median(administracion$Promedio),2)))

Arquitectura <- filter (datos, Promedio > 0 & Carrera == "ARQUITECTURA") 

mean(Arquitectura$Promedio)
## [1] 86.46481
median(Arquitectura$Promedio)
## [1] 86.58
ggplot(Arquitectura, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Arquitectura",subtitle =  paste("Media = ", round(mean(Arquitectura$Promedio),2), ", Mediana = ", round(median(Arquitectura$Promedio),2))) 

Bioquimica <- filter (datos, Promedio > 0 & Carrera == "BIOQUIMICA") 

mean(Bioquimica$Promedio)
## [1] 84.68143
median(Bioquimica$Promedio)
## [1] 84.06
ggplot(Bioquimica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de Bioquimica",subtitle =  paste("Media = ", round(mean(Bioquimica$Promedio),2), ", Mediana = ", round(median(Bioquimica$Promedio),2))) 

civil <- filter (datos, Promedio > 0 & Carrera == "CIVIL") 

mean(civil$Promedio)
## [1] 84.281
median(civil$Promedio)
## [1] 83.915
ggplot(civil, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de civil",subtitle =  paste("Media = ", round(mean(civil$Promedio),2), ", Mediana = ", round(median(civil$Promedio),2))) 

electrica<- filter (datos, Promedio > 0 & Carrera == "ELECTRICA") 

mean(electrica$Promedio)
## [1] 83.77305
median(electrica$Promedio)
## [1] 83.24
ggplot(electrica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de electrica",subtitle =  paste("Media = ", round(mean(electrica$Promedio),2), ", Mediana = ", round(median(electrica$Promedio),2))) 

electronica <- filter (datos, Promedio > 0 & Carrera == "ELECTRONICA") 

mean(electronica$Promedio)
## [1] 86.6572
median(electronica$Promedio)
## [1] 86.67
ggplot(electronica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de electronica",subtitle =  paste("Media = ", round(mean(electronica$Promedio),2), ", Mediana = ", round(median(electronica$Promedio),2))) 

gestionempresarial <- filter (datos, Promedio > 0 & Carrera == "GESTION EMPRESARIAL") 

mean(gestionempresarial$Promedio)
## [1] 87.66966
median(gestionempresarial$Promedio)
## [1] 87.59
ggplot(gestionempresarial, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de gestionempresarial",subtitle =  paste("Media = ", round(mean(gestionempresarial$Promedio),2), ", Mediana = ", round(median(gestionempresarial$Promedio),2))) 

industrial <- filter (datos, Promedio > 0 & Carrera == "INDUSTRIAL") 

mean(industrial$Promedio)
## [1] 85.01737
median(industrial$Promedio)
## [1] 84.64
ggplot(industrial, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de industrial",subtitle =  paste("Media = ", round(mean(industrial$Promedio),2), ", Mediana = ", round(median(industrial$Promedio),2))) 

mecanica <- filter (datos, Promedio > 0 & Carrera == "MECANICA") 

mean(mecanica$Promedio)
## [1] 82.58467
median(mecanica$Promedio)
## [1] 82.02
ggplot(mecanica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de mecanica",subtitle =  paste("Media = ", round(mean(mecanica$Promedio),2), ", Mediana = ", round(median(mecanica$Promedio),2))) 

quimica <- filter (datos, Promedio > 0 & Carrera == "QUIMICA") 

mean(quimica$Promedio)
## [1] 86.05215
median(quimica$Promedio)
## [1] 85.67
ggplot(quimica, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de quimica",subtitle =  paste("Media = ", round(mean(quimica$Promedio),2), ", Mediana = ", round(median(quimica$Promedio),2))) 

tic <- filter (datos, Promedio > 0 & Carrera == "TIC") 

mean(tic$Promedio)
## [1] 84.31719
median(tic$Promedio)
## [1] 83.24
ggplot(tic, aes(Promedio)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = median(Promedio),
                  color = "mediana"),
              linetype = "solid",
              size = 2) +
    geom_vline(aes(xintercept = mean(Promedio),
                  color = "media"),
              linetype = "solid",
              size = 2) +
  labs(title = "Histograma de Promedio de tic",subtitle =  paste("Media = ", round(mean(tic$Promedio),2), ", Mediana = ", round(median(tic$Promedio),2))) 

n <- nrow(administracion)
ggplot(administracion, aes(x = 1:n, y = Promedio)) +
   geom_point() +
    labs(title = "Dispersion de Promedio de Administracion", subtitle =  paste("Varianza = ", round(var(administracion$Promedio),2), ", DesvStd = ", round(sd(administracion$Promedio),2), ", C.V. = ",  round(sd(administracion$Promedio) / mean(administracion$Promedio) *  100, 2 )))

6.- Tabla para todos los promedios estadisticos solicitados

tabla <- datos %>%
    group_by (Carrera) %>%
    summarize(n = n(), media = mean(Promedio), mediana = median(Promedio), vari = var(Promedio), desvstd = sd(Promedio), cv = desvstd / media * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
tabla
## # A tibble: 14 x 7
##    Carrera                 n media mediana  vari desvstd    cv
##    <fct>               <int> <dbl>   <dbl> <dbl>   <dbl> <dbl>
##  1 ADMINISTRACION        497  74.5    88.4 1125.    33.5  45.0
##  2 ARQUITECTURA          675  70.1    85.4 1163.    34.1  48.7
##  3 BIOQUIMICA            441  68.6    82.8 1126.    33.6  48.9
##  4 CIVIL                 648  73.1    83.1  834.    28.9  39.5
##  5 ELECTRICA             280  60.7    81.8 1414.    37.6  61.9
##  6 ELECTRONICA           161  67.3    85.3 1324.    36.4  54.1
##  7 GESTION EMPRESARIAL   585  74.2    86.7 1013.    31.8  42.9
##  8 INDUSTRIAL            707  74.2    83.7  819.    28.6  38.6
##  9 INFORMATICA           101  60.6    83.6 1581.    39.8  65.6
## 10 MECANICA              301  61.7    80.7 1302.    36.1  58.4
## 11 MECATRONICA           432  70.8    83.4  981.    31.3  44.3
## 12 QUIMICA               568  72.6    84.6  996.    31.6  43.5
## 13 SISTEMAS              452  70.9    84.1 1081.    32.9  46.4
## 14 TIC                    81  66.6    81.7 1209.    34.8  52.2