Revisión grupo 2

rm(list=ls())
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 503900 27.0    1117592 59.7   644242 34.5
## Vcells 888360  6.8    8388608 64.0  1635170 12.5

ls()

## character(0)

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.2.3

library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.2.3

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)

## Warning: package 'readxl' was built under R version 4.2.3

DATOS <- read_excel("C:/Users/Julietha Zorro M/Downloads/DATOS.xlsx")
View(DATOS)

Se carga la información de Terridata

Indice de pobreza monetaria en hombres

El código usado por los estudiantes es:

ggplot(DATOS, aes(x = Entidad, y = IPMhombres)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Indice de pobreza monetaria en hombres",
       x = "Indice",
       y = "Entidad")

Se ajusta el codigo para ordenar de mayor a menor por IPM y para presentar juntos hombres y mujeres

names(DATOS)

## [1] "Codigo"            "Entidad"           "IPMhombres"       
## [4] "IPMMUJERES"        "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"

# Ordenar el dataframe por la columna IPMhombres de manera descendente
DATOS <- DATOS[order(DATOS$IPMhombres, decreasing = TRUE), ]

# Convertir la variable Entidad en un factor con el orden deseado
DATOS$Entidad <- factor(DATOS$Entidad, levels = DATOS$Entidad)

# Crear el gráfico de barras
plot1 <- ggplot(DATOS, aes(x = Entidad, y = IPMhombres)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Indice de pobreza monetaria en hombres",
       subtitle = "Año xxx",
       caption = "Fuente: Terridata?¿",
       x = "Entidad",
       y = "Indice")+
  geom_text(aes(label = paste(round(IPMhombres,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) +  # Agrega etiquetas de texto
  theme_light()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size=7.5))
  

# Mostrar el gráfico
print(plot1)

# Ordenar el dataframe por la columna IPMmujeres de manera descendente
DATOS <- DATOS[order(DATOS$IPMMUJERES, decreasing = TRUE), ]

# Convertir la variable Entidad en un factor con el orden deseado
DATOS$Entidad <- factor(DATOS$Entidad, levels = DATOS$Entidad)

# Crear el gráfico de barras
plot2 <- ggplot(DATOS, aes(x = Entidad, y = IPMMUJERES)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Indice de pobreza monetaria en mujeres",
       subtitle = "Año xxx",
       caption = "Fuente: Terridata?¿",
       x = "Entidad",
       y = "Indice")+
  geom_text(aes(label = paste(round(IPMhombres,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) +  # Agrega etiquetas de texto
  theme_light()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size=7.5))
  

# Mostrar el gráfico
print(plot2)

Se pueden hacer un gráfico de barras

library(tidyr)
names(DATOS)

## [1] "Codigo"            "Entidad"           "IPMhombres"       
## [4] "IPMMUJERES"        "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"

IPM <- DATOS %>%
  select(Codigo,Entidad,'IPMhombres','IPMMUJERES')%>%
  pivot_longer(cols=c('IPMhombres','IPMMUJERES'),
              names_to='Sexo',
              values_to='IPM')

# Ordenar el dataframe según el valor de IPMhombres de manera descendente
H <- IPM %>%
  filter(Sexo =="IPMhombres")%>%
  arrange(desc(ifelse(Sexo == "IPMhombres", IPM, NA)))

# Convertir la variable Entidad en un factor con el orden deseado
IPM$Entidad <- factor(IPM$Entidad, levels = H$Entidad)



plot3 <- ggplot(IPM, aes(x = Entidad, y = IPM))+
  geom_bar(stat = "identity", fill = "darkgreen") +
  labs(title = "Indice de pobreza monetaria en hombres",
       subtitle = "Año xxx",
       caption = "Fuente: Terridata?¿",
       x = "Entidad",
       y = "Indice")+
  geom_text(aes(label = paste(round(IPM,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) +  # Agrega etiquetas de texto
  theme_light()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size=5.5))+
  facet_grid(Sexo ~ .)

plot3

Estadísticas descriptivas. Avance 2

DescTools::Abstract(DATOS)

## ------------------------------------------------------------------------------ 
## DATOS
## 
## data frame:  23 obs. of  6 variables
##      23 complete cases (100.0%)
## 
##   Nr  ColName            Class      NAs  Levels                            
##   1   Codigo             character  .                                      
##   2   Entidad            factor     .    (23): 1-Chocó, 2-La Guajira,      
##                                          3-Magdalena, 4-Sucre, 5-Cauca, ...
##   3   IPMhombres         numeric    .                                      
##   4   IPMMUJERES         numeric    .                                      
##   5   IPMEXTREMOMUJERES  numeric    .                                      
##   6   IPMEXTREMOHOMBRES  numeric    .

summary(DATOS)

##     Codigo                Entidad     IPMhombres      IPMMUJERES   
##  Length:23          Chocó     : 1   Min.   :15.65   Min.   :17.12  
##  Class :character   La Guajira: 1   1st Qu.:23.72   1st Qu.:25.36  
##  Mode  :character   Magdalena : 1   Median :34.58   Median :39.16  
##                     Sucre     : 1   Mean   :34.27   Mean   :36.56  
##                     Cauca     : 1   3rd Qu.:43.09   3rd Qu.:45.63  
##                     Nariño    : 1   Max.   :61.05   Max.   :65.21  
##                     (Other)   :17                                  
##  IPMEXTREMOMUJERES IPMEXTREMOHOMBRES
##  Min.   : 3.495    Min.   : 3.309   
##  1st Qu.: 6.627    1st Qu.: 6.039   
##  Median :10.155    Median : 9.019   
##  Mean   :12.142    Mean   :11.244   
##  3rd Qu.:14.677    3rd Qu.:13.715   
##  Max.   :38.991    Max.   :36.940   
##

DescTools::Desc(DATOS)

## ------------------------------------------------------------------------------ 
## Describe DATOS (tbl_df, tbl, data.frame):
## 
## data frame:  23 obs. of  6 variables
##      23 complete cases (100.0%)
## 
##   Nr  ColName            Class      NAs  Levels                            
##   1   Codigo             character  .                                      
##   2   Entidad            factor     .    (23): 1-Chocó, 2-La Guajira,      
##                                          3-Magdalena, 4-Sucre, 5-Cauca, ...
##   3   IPMhombres         numeric    .                                      
##   4   IPMMUJERES         numeric    .                                      
##   5   IPMEXTREMOMUJERES  numeric    .                                      
##   6   IPMEXTREMOHOMBRES  numeric    .                                      
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - Codigo (character)
## 
##   length      n    NAs unique levels  dupes
##       23     23      0     23     23      n
##          100.0%   0.0%                     
## 
##     level  freq  perc  cumfreq  cumperc
## 1   05000     1  4.3%        1     4.3%
## 2   08000     1  4.3%        2     8.7%
## 3   13000     1  4.3%        3    13.0%
## 4   15000     1  4.3%        4    17.4%
## 5   17000     1  4.3%        5    21.7%
## 6   18000     1  4.3%        6    26.1%
## 7   19000     1  4.3%        7    30.4%
## 8   20000     1  4.3%        8    34.8%
## 9   23000     1  4.3%        9    39.1%
## 10  25000     1  4.3%       10    43.5%
## 11  27000     1  4.3%       11    47.8%
## 12  41000     1  4.3%       12    52.2%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 2 - Entidad (factor)
## 
##   length      n    NAs unique levels  dupes
##       23     23      0     23     23      n
##          100.0%   0.0%                     
## 
##                  level  freq  perc  cumfreq  cumperc
## 1                Chocó     1  4.3%        1     4.3%
## 2           La Guajira     1  4.3%        2     8.7%
## 3            Magdalena     1  4.3%        3    13.0%
## 4                Sucre     1  4.3%        4    17.4%
## 5                Cauca     1  4.3%        5    21.7%
## 6               Nariño     1  4.3%        6    26.1%
## 7                Huila     1  4.3%        7    30.4%
## 8              Córdoba     1  4.3%        8    34.8%
## 9                Cesar     1  4.3%        9    39.1%
## 10  Norte de Santander     1  4.3%       10    43.5%
## 11             Bolívar     1  4.3%       11    47.8%
## 12             Caquetá     1  4.3%       12    52.2%
## ... etc.
##  [list output truncated]

## ------------------------------------------------------------------------------ 
## 3 - IPMhombres (numeric)
## 
##     length         n       NAs    unique        0s      mean    meanCI'
##         23        23         0       = n         0  34.26577  28.89899
##               100.0%      0.0%                0.0%            39.63255
##                                                                       
##        .05       .10       .25    median       .75       .90       .95
##   17.12231  18.44508  23.71680  34.58050  43.09380  46.84946  48.02525
##                                                                       
##      range        sd     vcoef       mad       IQR      skew      kurt
##   45.39700  12.41068   0.36219  14.17618  19.37700   0.12881  -1.10400
##                                                                       
## lowest : 15.6497, 17.002, 18.2051, 19.405, 20.9664
## highest: 46.5159, 46.6841, 46.8908, 48.1513, 61.0467
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 4 - IPMMUJERES (numeric)
## 
##     length         n       NAs    unique        0s      mean    meanCI'
##         23        23         0       = n         0  36.55700  31.05258
##               100.0%      0.0%                0.0%            42.06141
##                                                                       
##        .05       .10       .25    median       .75       .90       .95
##   18.47209  20.98052  25.35935  39.16270  45.62990  49.57454  50.04389
##                                                                       
##      range        sd     vcoef       mad       IQR      skew      kurt
##   48.09200  12.72896   0.34819  14.29760  20.27055   0.15977  -0.94909
##                                                                       
## lowest : 17.1151, 18.2166, 20.7715, 21.8166, 23.08
## highest: 48.4173, 48.8063, 49.7666, 50.0747, 65.2071
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 5 - IPMEXTREMOMUJERES (numeric)
## 
##     length        n      NAs    unique        0s      mean    meanCI'
##         23       23        0       = n         0  12.14190   8.70277
##              100.0%     0.0%                0.0%            15.58104
##                                                                     
##        .05      .10      .25    median       .75       .90       .95
##    3.96583  4.74852  6.62735  10.15480  14.67680  19.39458  22.63241
##                                                                     
##      range       sd    vcoef       mad       IQR      skew      kurt
##   35.49530  7.95301  0.65500   5.24336   8.04945   1.65203   3.08715
##                                                                     
## lowest : 3.4954, 3.8946, 4.6069, 5.315, 6.4348
## highest: 18.861, 19.2265, 19.4366, 22.9875, 38.9907
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 6 - IPMEXTREMOHOMBRES (numeric)
## 
##     length        n      NAs   unique        0s      mean    meanCI'
##         23       23        0      = n         0  11.24388   7.96809
##              100.0%     0.0%               0.0%            14.51967
##                                                                    
##        .05      .10      .25   median       .75       .90       .95
##    3.47440  4.40280  6.03865  9.01910  13.71465  18.62768  20.75039
##                                                                    
##      range       sd    vcoef      mad       IQR      skew      kurt
##   33.63060  7.57526  0.67372  5.58303   7.67600   1.66129   3.16034
##                                                                    
## lowest : 3.3094, 3.3899, 4.2349, 5.0744, 5.2534
## highest: 17.2528, 17.7908, 18.8369, 20.963, 36.94
## 
## ' 95%-CI (classic)

Boxplot:

names(DATOS)

## [1] "Codigo"            "Entidad"           "IPMhombres"       
## [4] "IPMMUJERES"        "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"

ggplot(data=DATOS,aes(x=Entidad, y=IPMMUJERES, fill=Entidad)) +
  geom_boxplot() +
  geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
    legend.position="none",
    plot.title = element_text(size=11)
  ) +
  ggtitle("Boxplot") +
  xlab("")

Boxplot ajustado:

box <- ggplot(data=IPM,aes(x=Sexo,y=IPM,)) +
  geom_boxplot() +
  geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
    legend.position="none",
    plot.title = element_text(size=11)
  ) +
  ggtitle("Boxplot") +
  xlab("")

box

Revisión grupo 2

Julieth Zorro Melo

2024-04-08

Indice de pobreza monetaria en hombres