rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 503900 27.0 1117592 59.7 644242 34.5
## Vcells 888360 6.8 8388608 64.0 1635170 12.5
ls()
## character(0)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.2.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
DATOS <- read_excel("C:/Users/Julietha Zorro M/Downloads/DATOS.xlsx")
View(DATOS)
Se carga la información de Terridata
El código usado por los estudiantes es:
ggplot(DATOS, aes(x = Entidad, y = IPMhombres)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Indice de pobreza monetaria en hombres",
x = "Indice",
y = "Entidad")
Se ajusta el codigo para ordenar de mayor a menor por IPM y para presentar juntos hombres y mujeres
names(DATOS)
## [1] "Codigo" "Entidad" "IPMhombres"
## [4] "IPMMUJERES" "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"
# Ordenar el dataframe por la columna IPMhombres de manera descendente
DATOS <- DATOS[order(DATOS$IPMhombres, decreasing = TRUE), ]
# Convertir la variable Entidad en un factor con el orden deseado
DATOS$Entidad <- factor(DATOS$Entidad, levels = DATOS$Entidad)
# Crear el grƔfico de barras
plot1 <- ggplot(DATOS, aes(x = Entidad, y = IPMhombres)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Indice de pobreza monetaria en hombres",
subtitle = "AƱo xxx",
caption = "Fuente: Terridata?Āæ",
x = "Entidad",
y = "Indice")+
geom_text(aes(label = paste(round(IPMhombres,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) + # Agrega etiquetas de texto
theme_light()+
theme(axis.text.x = element_text(angle = 45, hjust = 1,size=7.5))
# Mostrar el grƔfico
print(plot1)
# Ordenar el dataframe por la columna IPMmujeres de manera descendente
DATOS <- DATOS[order(DATOS$IPMMUJERES, decreasing = TRUE), ]
# Convertir la variable Entidad en un factor con el orden deseado
DATOS$Entidad <- factor(DATOS$Entidad, levels = DATOS$Entidad)
# Crear el grƔfico de barras
plot2 <- ggplot(DATOS, aes(x = Entidad, y = IPMMUJERES)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Indice de pobreza monetaria en mujeres",
subtitle = "AƱo xxx",
caption = "Fuente: Terridata?Āæ",
x = "Entidad",
y = "Indice")+
geom_text(aes(label = paste(round(IPMhombres,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) + # Agrega etiquetas de texto
theme_light()+
theme(axis.text.x = element_text(angle = 45, hjust = 1,size=7.5))
# Mostrar el grƔfico
print(plot2)
Se pueden hacer un grƔfico de barras
library(tidyr)
names(DATOS)
## [1] "Codigo" "Entidad" "IPMhombres"
## [4] "IPMMUJERES" "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"
IPM <- DATOS %>%
select(Codigo,Entidad,'IPMhombres','IPMMUJERES')%>%
pivot_longer(cols=c('IPMhombres','IPMMUJERES'),
names_to='Sexo',
values_to='IPM')
# Ordenar el dataframe segĆŗn el valor de IPMhombres de manera descendente
H <- IPM %>%
filter(Sexo =="IPMhombres")%>%
arrange(desc(ifelse(Sexo == "IPMhombres", IPM, NA)))
# Convertir la variable Entidad en un factor con el orden deseado
IPM$Entidad <- factor(IPM$Entidad, levels = H$Entidad)
plot3 <- ggplot(IPM, aes(x = Entidad, y = IPM))+
geom_bar(stat = "identity", fill = "darkgreen") +
labs(title = "Indice de pobreza monetaria en hombres",
subtitle = "AƱo xxx",
caption = "Fuente: Terridata?Āæ",
x = "Entidad",
y = "Indice")+
geom_text(aes(label = paste(round(IPM,digits = 1),"%")), vjust = -0.5, color = "black", size = 2) + # Agrega etiquetas de texto
theme_light()+
theme(axis.text.x = element_text(angle = 45, hjust = 1,size=5.5))+
facet_grid(Sexo ~ .)
plot3
EstadĆsticas descriptivas. Avance 2
DescTools::Abstract(DATOS)
## ------------------------------------------------------------------------------
## DATOS
##
## data frame: 23 obs. of 6 variables
## 23 complete cases (100.0%)
##
## Nr ColName Class NAs Levels
## 1 Codigo character .
## 2 Entidad factor . (23): 1-Chocó, 2-La Guajira,
## 3-Magdalena, 4-Sucre, 5-Cauca, ...
## 3 IPMhombres numeric .
## 4 IPMMUJERES numeric .
## 5 IPMEXTREMOMUJERES numeric .
## 6 IPMEXTREMOHOMBRES numeric .
summary(DATOS)
## Codigo Entidad IPMhombres IPMMUJERES
## Length:23 Chocó : 1 Min. :15.65 Min. :17.12
## Class :character La Guajira: 1 1st Qu.:23.72 1st Qu.:25.36
## Mode :character Magdalena : 1 Median :34.58 Median :39.16
## Sucre : 1 Mean :34.27 Mean :36.56
## Cauca : 1 3rd Qu.:43.09 3rd Qu.:45.63
## NariƱo : 1 Max. :61.05 Max. :65.21
## (Other) :17
## IPMEXTREMOMUJERES IPMEXTREMOHOMBRES
## Min. : 3.495 Min. : 3.309
## 1st Qu.: 6.627 1st Qu.: 6.039
## Median :10.155 Median : 9.019
## Mean :12.142 Mean :11.244
## 3rd Qu.:14.677 3rd Qu.:13.715
## Max. :38.991 Max. :36.940
##
DescTools::Desc(DATOS)
## ------------------------------------------------------------------------------
## Describe DATOS (tbl_df, tbl, data.frame):
##
## data frame: 23 obs. of 6 variables
## 23 complete cases (100.0%)
##
## Nr ColName Class NAs Levels
## 1 Codigo character .
## 2 Entidad factor . (23): 1-Chocó, 2-La Guajira,
## 3-Magdalena, 4-Sucre, 5-Cauca, ...
## 3 IPMhombres numeric .
## 4 IPMMUJERES numeric .
## 5 IPMEXTREMOMUJERES numeric .
## 6 IPMEXTREMOHOMBRES numeric .
##
##
## ------------------------------------------------------------------------------
## 1 - Codigo (character)
##
## length n NAs unique levels dupes
## 23 23 0 23 23 n
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 05000 1 4.3% 1 4.3%
## 2 08000 1 4.3% 2 8.7%
## 3 13000 1 4.3% 3 13.0%
## 4 15000 1 4.3% 4 17.4%
## 5 17000 1 4.3% 5 21.7%
## 6 18000 1 4.3% 6 26.1%
## 7 19000 1 4.3% 7 30.4%
## 8 20000 1 4.3% 8 34.8%
## 9 23000 1 4.3% 9 39.1%
## 10 25000 1 4.3% 10 43.5%
## 11 27000 1 4.3% 11 47.8%
## 12 41000 1 4.3% 12 52.2%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 2 - Entidad (factor)
##
## length n NAs unique levels dupes
## 23 23 0 23 23 n
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Chocó 1 4.3% 1 4.3%
## 2 La Guajira 1 4.3% 2 8.7%
## 3 Magdalena 1 4.3% 3 13.0%
## 4 Sucre 1 4.3% 4 17.4%
## 5 Cauca 1 4.3% 5 21.7%
## 6 NariƱo 1 4.3% 6 26.1%
## 7 Huila 1 4.3% 7 30.4%
## 8 Córdoba 1 4.3% 8 34.8%
## 9 Cesar 1 4.3% 9 39.1%
## 10 Norte de Santander 1 4.3% 10 43.5%
## 11 BolĆvar 1 4.3% 11 47.8%
## 12 CaquetĆ” 1 4.3% 12 52.2%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 3 - IPMhombres (numeric)
##
## length n NAs unique 0s mean meanCI'
## 23 23 0 = n 0 34.26577 28.89899
## 100.0% 0.0% 0.0% 39.63255
##
## .05 .10 .25 median .75 .90 .95
## 17.12231 18.44508 23.71680 34.58050 43.09380 46.84946 48.02525
##
## range sd vcoef mad IQR skew kurt
## 45.39700 12.41068 0.36219 14.17618 19.37700 0.12881 -1.10400
##
## lowest : 15.6497, 17.002, 18.2051, 19.405, 20.9664
## highest: 46.5159, 46.6841, 46.8908, 48.1513, 61.0467
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 4 - IPMMUJERES (numeric)
##
## length n NAs unique 0s mean meanCI'
## 23 23 0 = n 0 36.55700 31.05258
## 100.0% 0.0% 0.0% 42.06141
##
## .05 .10 .25 median .75 .90 .95
## 18.47209 20.98052 25.35935 39.16270 45.62990 49.57454 50.04389
##
## range sd vcoef mad IQR skew kurt
## 48.09200 12.72896 0.34819 14.29760 20.27055 0.15977 -0.94909
##
## lowest : 17.1151, 18.2166, 20.7715, 21.8166, 23.08
## highest: 48.4173, 48.8063, 49.7666, 50.0747, 65.2071
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 5 - IPMEXTREMOMUJERES (numeric)
##
## length n NAs unique 0s mean meanCI'
## 23 23 0 = n 0 12.14190 8.70277
## 100.0% 0.0% 0.0% 15.58104
##
## .05 .10 .25 median .75 .90 .95
## 3.96583 4.74852 6.62735 10.15480 14.67680 19.39458 22.63241
##
## range sd vcoef mad IQR skew kurt
## 35.49530 7.95301 0.65500 5.24336 8.04945 1.65203 3.08715
##
## lowest : 3.4954, 3.8946, 4.6069, 5.315, 6.4348
## highest: 18.861, 19.2265, 19.4366, 22.9875, 38.9907
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 6 - IPMEXTREMOHOMBRES (numeric)
##
## length n NAs unique 0s mean meanCI'
## 23 23 0 = n 0 11.24388 7.96809
## 100.0% 0.0% 0.0% 14.51967
##
## .05 .10 .25 median .75 .90 .95
## 3.47440 4.40280 6.03865 9.01910 13.71465 18.62768 20.75039
##
## range sd vcoef mad IQR skew kurt
## 33.63060 7.57526 0.67372 5.58303 7.67600 1.66129 3.16034
##
## lowest : 3.3094, 3.3899, 4.2349, 5.0744, 5.2534
## highest: 17.2528, 17.7908, 18.8369, 20.963, 36.94
##
## ' 95%-CI (classic)
Boxplot:
names(DATOS)
## [1] "Codigo" "Entidad" "IPMhombres"
## [4] "IPMMUJERES" "IPMEXTREMOMUJERES" "IPMEXTREMOHOMBRES"
ggplot(data=DATOS,aes(x=Entidad, y=IPMMUJERES, fill=Entidad)) +
geom_boxplot() +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Boxplot") +
xlab("")
Boxplot ajustado:
box <- ggplot(data=IPM,aes(x=Sexo,y=IPM,)) +
geom_boxplot() +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Boxplot") +
xlab("")
box