library(readxl)
CVH <- read_excel("~/Cabecera - Vivienda y Hogares.xlsx")
## Warning: Expecting logical in L1636 / R1636C12: got 'PROMESA DE VENTA'
## Warning: Expecting logical in L3330 / R3330C12: got 'COMPRA VENTA'
## Warning: Expecting logical in L3334 / R3334C12: got 'PROTOCOLIZACION'
## Warning: Expecting logical in L8465 / R8465C12: got 'ES DE UNA HIJA Y NO PAGA
## ARRENDO'
## Warning: Expecting logical in L9365 / R9365C12: got 'TIENEN DOCUMENTO DE
## COMPRAVENTA'
## Warning: Expecting logical in L12935 / R12935C12: got 'DOCUMENTO DE
## PROTOCOLIZACION'
## Warning: Expecting logical in L12945 / R12945C12: got 'PROTOCOLIZACION'
## Warning: Expecting logical in L13011 / R13011C12: got 'COMPRA VENTA'
## Warning: Expecting logical in L13104 / R13104C12: got 'PROMESA DE COMPRAVENTA'
## Warning: Expecting logical in L13117 / R13117C12: got 'PROMESA DE COMPRAVENTA'
## Warning: Expecting logical in L13135 / R13135C12: got 'PROMESA DE COMPRAVENTA'
## Warning: Expecting logical in L13813 / R13813C12: got 'PROMESA DE VENTA'
## Warning: Expecting logical in L13833 / R13833C12: got 'PROMESA DE VENTA'
## Warning: Expecting logical in L13834 / R13834C12: got 'PROMESA DE VENTA'
## Warning: Expecting logical in L15175 / R15175C12: got 'COMPRA VENTA'
## Warning: Expecting logical in L15181 / R15181C12: got 'TIENE DOCUMENTO DE
## PROTOCOLIZACION'
## Warning: Expecting logical in L15182 / R15182C12: got 'PROTOCOLIZACION'
## Warning: Expecting logical in L15190 / R15190C12: got 'TIENE DOCUMENTO DE COMPRA
## VENTA'
## Warning: Expecting logical in L15409 / R15409C12: got 'TIENE PROTOCOLIZACION'
## Warning: Expecting logical in L15871 / R15871C12: got 'FAMILIAR'
View(CVH)
attach(CVH)
names(CVH)
## [1] "DIRECTORIO" "SECUENCIA_P" "P5000" "P5010" "P5020"
## [6] "P5030" "P5040" "P5050" "P5070" "P5080"
## [11] "P5090" "P5090S1" "P5100" "P5110" "P5130"
## [16] "P5140" "P5210S1" "P5210S2" "P5210S3" "P5210S4"
## [21] "P5210S5" "P5210S6" "P5210S7" "P5210S8" "P5210S9"
## [26] "P5210S10" "P5210S11" "P5210S14" "P5210S15" "P5210S16"
## [31] "P5210S17" "P5210S18" "P5210S19" "P5210S20" "P5210S21"
## [36] "P5210S22" "P5210S24" "P5220" "P5220S1" "P6008"
## [41] "P6007" "P6007S1" "HOGAR" "P4000" "P4010"
## [46] "P4020" "P4030S1" "P4030S1A1" "P4030S2" "P4030S3"
## [51] "P4030S4" "P4030S4A1" "P4030S5" "P4040" "REGIS"
## [56] "AREA" "CLASE" "MES" "DPTO" "fex_c_2011"
#vARIABLES NUMERICAS: P5000, p5010, P5220S1
#vARIABLES CATEGORICA: P5020, P5040, P5050
attach(CVH)
## The following objects are masked from CVH (pos = 3):
##
## AREA, CLASE, DIRECTORIO, DPTO, fex_c_2011, HOGAR, MES, P4000,
## P4010, P4020, P4030S1, P4030S1A1, P4030S2, P4030S3, P4030S4,
## P4030S4A1, P4030S5, P4040, P5000, P5010, P5020, P5030, P5040,
## P5050, P5070, P5080, P5090, P5090S1, P5100, P5110, P5130, P5140,
## P5210S1, P5210S10, P5210S11, P5210S14, P5210S15, P5210S16,
## P5210S17, P5210S18, P5210S19, P5210S2, P5210S20, P5210S21,
## P5210S22, P5210S24, P5210S3, P5210S4, P5210S5, P5210S6, P5210S7,
## P5210S8, P5210S9, P5220, P5220S1, P6007, P6007S1, P6008, REGIS,
## SECUENCIA_P
names(CVH)
## [1] "DIRECTORIO" "SECUENCIA_P" "P5000" "P5010" "P5020"
## [6] "P5030" "P5040" "P5050" "P5070" "P5080"
## [11] "P5090" "P5090S1" "P5100" "P5110" "P5130"
## [16] "P5140" "P5210S1" "P5210S2" "P5210S3" "P5210S4"
## [21] "P5210S5" "P5210S6" "P5210S7" "P5210S8" "P5210S9"
## [26] "P5210S10" "P5210S11" "P5210S14" "P5210S15" "P5210S16"
## [31] "P5210S17" "P5210S18" "P5210S19" "P5210S20" "P5210S21"
## [36] "P5210S22" "P5210S24" "P5220" "P5220S1" "P6008"
## [41] "P6007" "P6007S1" "HOGAR" "P4000" "P4010"
## [46] "P4020" "P4030S1" "P4030S1A1" "P4030S2" "P4030S3"
## [51] "P4030S4" "P4030S4A1" "P4030S5" "P4040" "REGIS"
## [56] "AREA" "CLASE" "MES" "DPTO" "fex_c_2011"
#Realice un cruce entre dos variables categoricas e interprete
Tabla1=table(P5020,P5040)
Tabla1
## P5040
## P5020 1 2 3 4 5
## 1 15917 5 7 11 18
## 2 549 5 11 20 4
## 3 407 13 7 5 1
## 4 0 0 1 1 0
## 5 8 0 0 0 0
## 6 37 6 12 11 1
Tabla2=prop.table(Tabla1, margin = 2)
Tabla2
## P5040
## P5020 1 2 3 4 5
## 1 0.9408322497 0.1724137931 0.1842105263 0.2291666667 0.7500000000
## 2 0.0324506443 0.1724137931 0.2894736842 0.4166666667 0.1666666667
## 3 0.0240572172 0.4482758621 0.1842105263 0.1041666667 0.0416666667
## 4 0.0000000000 0.0000000000 0.0263157895 0.0208333333 0.0000000000
## 5 0.0004728691 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## 6 0.0021870197 0.2068965517 0.3157894737 0.2291666667 0.0416666667
##Análisis
#Para inodoro conectado al alcantarillado el 98,59% tiene sanitario exclusivo para las personas del hogar y el 1,4% lo comparte con personas de otro hogar.\n#Para inodoro conectado a pozo septico el 95,24% tiene sanitario exclusivo para las personas del hogar y el 4,75% lo comparte con personas de otro hogar.\n#Para inodoro sin conexión el 95,81% tiene sanitario exclusivo para las personas del hogar y el 4,18% lo comparte con personas de otro hogar.\n#Para letrina el 100% lo tiene exclusivo para las personas del hogar.#Interpretación\n#Para inodoro conectado al alcantarillado el 98,59% tiene sanitario exclusivo para las personas del hogar y el 1,4% lo comparte con personas de otro hogar.\n#Para inodoro conectado a pozo septico el 95,24% tiene sanitario exclusivo para las personas del hogar y el 4,75% lo comparte con personas de otro hogar.\n#Para inodoro sin conexión el 95,81% tiene sanitario exclusivo para las personas del hogar y el 4,18% lo comparte con personas de otro hogar.\n#Para letrina el 100% lo tiene exclusivo para las personas del hogar."
col=c("YELLOW", "RED", "BLUE", "GREEN", "PURPLE", "BLACK")
barplot(Tabla2, col=col)
legend("topright", legend=c("Inodoro conectado a alcantarillado", "Inodoro conectado a pozo septico", "Inodoro sin conexion", "Letrina", "Bejamar", "No tiene servicio"), fill=col)
Tabla3=table(P5040,P5020)
Tabla3
## P5020
## P5040 1 2 3 4 5 6
## 1 15917 549 407 0 8 37
## 2 5 5 13 0 0 6
## 3 7 11 7 1 0 12
## 4 11 20 5 1 0 11
## 5 18 4 1 0 0 1
Tabla4=prop.table(Tabla3, margin = 2)
Tabla4
## P5020
## P5040 1 2 3 4 5
## 1 0.9974307557 0.9320882852 0.9399538106 0.0000000000 1.0000000000
## 2 0.0003133225 0.0084889643 0.0300230947 0.0000000000 0.0000000000
## 3 0.0004386515 0.0186757216 0.0161662818 0.5000000000 0.0000000000
## 4 0.0006893094 0.0339558574 0.0115473441 0.5000000000 0.0000000000
## 5 0.0011279609 0.0067911715 0.0023094688 0.0000000000 0.0000000000
## P5020
## P5040 6
## 1 0.5522388060
## 2 0.0895522388
## 3 0.1791044776
## 4 0.1641791045
## 5 0.0149253731
col=c("PINK", "PURPLE", "RED", "YELLOW","ORANGE")
barplot(Tabla4, col=col)
legend("topright", legend=c("Por recoleccion publica o privada", "La tiran a un rio, quebrada, caño o laguna", "La tiran a un patio,lote,zanja o baldio", "La queman o entierran", "La eliminan de otra forma"), fill = col)
#Realice un cruce entre variables continua y categorica
library(crosstable)
## Warning: package 'crosstable' was built under R version 4.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
class(P5000)
## [1] "numeric"
class(P5020)
## [1] "numeric"
CVH$P5020=as.factor(P5020)
class(CVH$P5020)
## [1] "factor"
crosstable(CVH, col=P5000, by=P5020) %>% as_flextable(keep_id=TRUE)
.id | label | variable | P5020 | |||||
|---|---|---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | 5 | 6 | |||
P5000 | P5000 | Min / Max | 1.0 / 32.0 | 1.0 / 11.0 | 1.0 / 8.0 | 3.0 / 3.0 | 1.0 / 2.0 | 1.0 / 5.0 |
Med [IQR] | 3.0 [3.0;4.0] | 3.0 [3.0;4.0] | 3.0 [3.0;4.0] | 3.0 [3.0;3.0] | 1.0 [1.0;1.0] | 2.0 [1.0;3.0] | ||
Mean (std) | 3.4 (1.1) | 3.2 (1.1) | 3.4 (1.3) | 3.0 (0) | 1.1 (0.4) | 2.0 (1.0) | ||
N (NA) | 15958 (0) | 589 (0) | 433 (0) | 2 (0) | 8 (0) | 67 (0) | ||
c=crosstable(CVH, col=P5000, by=P5020)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
p <- ggplot(CVH, aes(x=P5020, y=P5000, col=P5020, fill=P5020)) + geom_violin()
p
#Realice una tabla de frecuencia para variable categorica
library(fdth)
## Warning: package 'fdth' was built under R version 4.2.2
##
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
##
## sd, var
CVH$P5020=as.factor(P5020)
levels(CVH$P5020)=c(1,2,3,4,5,6)
Tabla5=fdt_cat(CVH$P5020, sort=F)
Tabla5
## Category f rf rf(%) cf cf(%)
## 1 15958 0.94 93.56 15958 93.56
## 2 589 0.03 3.45 16547 97.01
## 3 433 0.03 2.54 16980 99.55
## 4 2 0.00 0.01 16982 99.56
## 5 8 0.00 0.05 16990 99.61
## 6 67 0.00 0.39 17057 100.00
barplot(table(CVH$P5020), main = "El servicio sanitario que utiliza el hogar es:", xlab = "Opciones", ylab = "Frecuencia", col="ORANGE")
#Realice una tabla de frecuencia para variable continua.
library(fdth)
Tabla6=fdt(P5010, h=1, start = 0, end = 14, na.rm=T)
Tabla6
## Class limits f rf rf(%) cf cf(%)
## [0,1) 0 0.00 0.00 0 0.00
## [1,2) 5701 0.33 33.42 5701 33.42
## [2,3) 7002 0.41 41.05 12703 74.47
## [3,4) 3617 0.21 21.21 16320 95.68
## [4,5) 625 0.04 3.66 16945 99.34
## [5,6) 88 0.01 0.52 17033 99.86
## [6,7) 18 0.00 0.11 17051 99.96
## [7,8) 5 0.00 0.03 17056 99.99
## [8,9) 0 0.00 0.00 17056 99.99
## [9,10) 1 0.00 0.01 17057 100.00
## [10,11) 0 0.00 0.00 17057 100.00
## [11,12) 0 0.00 0.00 17057 100.00
## [12,13) 0 0.00 0.00 17057 100.00
## [13,14) 0 0.00 0.00 17057 100.00
plot(Tabla6, col="GREEN", xlab = "¿en cuántos de esos cuartos duermen las personas de este hogar?",)
p <- ggplot(CVH, aes(x=P5010))+ geom_histogram(binwidth=1, color="BLUE", fill="pink")
p
#Realice la estadística descriptiva acorde al tipo de variable.
##PARA VARIABLE CATEGORICA
P5020<-CVH$P5020
frecuencia<-table(P5020)
proporcion<-prop.table(frecuencia)
print(frecuencia)
## P5020
## 1 2 3 4 5 6
## 15958 589 433 2 8 67
print(proporcion)
## P5020
## 1 2 3 4 5 6
## 0.9355689746 0.0345312775 0.0253854722 0.0001172539 0.0004690157 0.0039280061
barplot(frecuencia, xlab = "P5020", ylab = "Frecuencia", main = "Tabla de frecuencia - P5020",col="blue")
pie(proporcion, labels = names(proporcion), main = "Tabla de proporciones - P5020",col="green")
####PARA VARIABLE NUMERICA
P5010<-CVH$P5010
media<-mean(P5010)
mediana<-median(P5010)
desviacion<-sd(P5010)
rango<-range(P5010)
print(media)
## [1] 1.97268
print(mediana)
## [1] 2
print(desviacion)
## [1] 0.8750258
print(rango)
## [1] 1 9
hist(P5010, col = "lightblue", main = "Distribución de P5010", xlab = "P5010", ylab = "Frecuencia")
abline(v = media, col = "blue", lwd = 2, lty = 2)
abline(v = mediana, col = "red", lwd = 2, lty = 2)
abline(v = media - desviacion, col = "yellow", lwd = 2, lty = 2)
abline(v = media + desviacion, col = "yellow", lwd = 2, lty = 2)
abline(v = rango, col = "purple", lwd = 2, lty = 2)
legend("topright", legend = c("Media", "Mediana", "± Desviación estándar", "Rango"), col = c("red", "blue", "orange", "purple"), lty = 2, lwd = 2)
#Calcule el coeficiente de correlación entre dos variables numéricas.
library(corrgram)
## Warning: package 'corrgram' was built under R version 4.2.3
cor(CVH$P5000, CVH$P5010, use = "complete.obs")
## [1] 0.6171798
plot(P5000, P5010, col="RED")
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
R=data.frame(CVH$P5000, CVH$P5010)
x=cor(R, use = "complete.obs")
corrplot(x, method="number")
x2=cor(x, method = "spearman")
x2
## CVH.P5000 CVH.P5010
## CVH.P5000 1 -1
## CVH.P5010 -1 1
corrplot(x2, method="number")
#library(readxl) CVH <- read_excel(“~/Cabecera - Vivienda y Hogares.xlsx”) View(CVH) attach(CVH) names(CVH)
#vARIABLES NUMERICAS: P5000, p5010, P5220S1 #vARIABLES CATEGORICA: P5020, P5040, P5050
attach(CVH) names(CVH) #Realice un cruce entre dos variables categoricas e interprete Tabla1=table(P5020,P5040) Tabla1 Tabla2=prop.table(Tabla1, margin = 2) Tabla2 col=c(“YELLOW”, “RED”, “BLUE”, “GREEN”, “PURPLE”, “BLACK”) barplot(Tabla2, col=col) legend(“topright”, legend=c(“Inodoro conectado a alcantarillado”, “Inodoro conectado a pozo septico”, “Inodoro sin conexion”, “Letrina”, “Bejamar”, “No tiene servicio”), fill=col)
Tabla3=table(P5040,P5020) Tabla3 Tabla4=prop.table(Tabla3, margin = 2) Tabla4 col=c(“RED”, “PINK”, “GREEN”, “GRAY”,“ORANGE”) barplot(Tabla4, col=col) legend(“topright”, legend=c(“Por recoleccion publica o privada”, “La tiran a un rio, quebrada, caño o laguna”, “La tiran a un patio,lote,zanja o baldio”, “La queman o entierran”, “La eliminan de otra forma”), fill = col)
#Realice un cruce entre variables continua y categorica library(crosstable) library(dplyr) class(P5000) class(P5020) CVH\(P5020=as.factor(P5020) class(CVH\)P5020) crosstable(CVH, col=P5000, by=P5020) %>% as_flextable(keep_id=TRUE) c=crosstable(CVH, col=P5000, by=P5020)
library(ggplot2) p <- ggplot(CVH, aes(x=P5020, y=P5000, col=P5020, fill=P5020)) + geom_violin() p
#Realice una tabla de frecuencia para variable categorica library(fdth) CVH\(P5020=as.factor(P5020) levels(CVH\)P5020)=c(1,2,3,4,5,6) Tabla5=fdt_cat(CVH\(P5020, sort=F) Tabla5 barplot(table(CVH\)P5020), main = “El servicio sanitario que utiliza el hogar es:”, xlab = “Opciones”, ylab = “Frecuencia”, col=“YELLOW”)
#Realice una tabla de frecuencia para variable continua. library(fdth) Tabla6=fdt(P5010, h=1, start = 0, end = 14, na.rm=T) Tabla6 plot(Tabla6, col=“GREEN”, xlab = “¿en cuántos de esos cuartos duermen las personas de este hogar?”,) p <- ggplot(CVH, aes(x=P5010))+ geom_histogram(binwidth=1, color=“orange”, fill=“pink”) p
#Realice la estadística descriptiva acorde al tipo de variable. ##PARA VARIABLE CATEGORICA P5020<-CVH\(P5020 frecuencia<-table(P5020) proporcion<-prop.table(frecuencia) print(frecuencia) print(proporcion) barplot(frecuencia, xlab = "P5020", ylab = "Frecuencia", main = "Tabla de frecuencia - P5020",col="blue") pie(proporcion, labels = names(proporcion), main = "Tabla de proporciones - P5020",col="green") ####PARA VARIABLE NUMERICA P5010<-CVH\)P5010 media<-mean(P5010) mediana<-median(P5010) desviacion<-sd(P5010) rango<-range(P5010) print(media) print(mediana) print(desviacion) print(rango) hist(P5010, col = “lightblue”, main = “Distribución de P5010”, xlab = “P5010”, ylab = “Frecuencia”) abline(v = media, col = “red”, lwd = 2, lty = 2) abline(v = mediana, col = “blue”, lwd = 2, lty = 2) abline(v = media - desviacion, col = “orange”, lwd = 2, lty = 2) abline(v = media + desviacion, col = “orange”, lwd = 2, lty = 2) abline(v = rango, col = “purple”, lwd = 2, lty = 2) legend(“topright”, legend = c(“Media”, “Mediana”, “± Desviación estándar”, “Rango”), col = c(“red”, “blue”, “orange”, “purple”), lty = 2, lwd = 2)
#Calcule el coeficiente de correlación entre dos variables numéricas. library(corrgram) cor(CVH\(P5000, CVH\)P5010, use = “complete.obs”) plot(P5000, P5010, col=“RED”)
library(corrplot) R=data.frame(CVH\(P5000, CVH\)P5010) x=cor(R, use = “complete.obs”) corrplot(x, method=“number”) x2=cor(x, method = “spearman”) x2 corrplot(x2, method=“number”)