Taller Pre examen

R Markdown Fabian Loza

library(readxl)
CVH <- read_excel("~/Cabecera - Vivienda y Hogares.xlsx")

## Warning: Expecting logical in L1636 / R1636C12: got 'PROMESA DE VENTA'

## Warning: Expecting logical in L3330 / R3330C12: got 'COMPRA VENTA'

## Warning: Expecting logical in L3334 / R3334C12: got 'PROTOCOLIZACION'

## Warning: Expecting logical in L8465 / R8465C12: got 'ES DE UNA HIJA Y NO PAGA
## ARRENDO'

## Warning: Expecting logical in L9365 / R9365C12: got 'TIENEN DOCUMENTO DE
## COMPRAVENTA'

## Warning: Expecting logical in L12935 / R12935C12: got 'DOCUMENTO DE
## PROTOCOLIZACION'

## Warning: Expecting logical in L12945 / R12945C12: got 'PROTOCOLIZACION'

## Warning: Expecting logical in L13011 / R13011C12: got 'COMPRA VENTA'

## Warning: Expecting logical in L13104 / R13104C12: got 'PROMESA DE COMPRAVENTA'

## Warning: Expecting logical in L13117 / R13117C12: got 'PROMESA DE COMPRAVENTA'

## Warning: Expecting logical in L13135 / R13135C12: got 'PROMESA DE COMPRAVENTA'

## Warning: Expecting logical in L13813 / R13813C12: got 'PROMESA DE VENTA'

## Warning: Expecting logical in L13833 / R13833C12: got 'PROMESA DE VENTA'

## Warning: Expecting logical in L13834 / R13834C12: got 'PROMESA DE VENTA'

## Warning: Expecting logical in L15175 / R15175C12: got 'COMPRA VENTA'

## Warning: Expecting logical in L15181 / R15181C12: got 'TIENE DOCUMENTO DE
## PROTOCOLIZACION'

## Warning: Expecting logical in L15182 / R15182C12: got 'PROTOCOLIZACION'

## Warning: Expecting logical in L15190 / R15190C12: got 'TIENE DOCUMENTO DE COMPRA
## VENTA'

## Warning: Expecting logical in L15409 / R15409C12: got 'TIENE PROTOCOLIZACION'

## Warning: Expecting logical in L15871 / R15871C12: got 'FAMILIAR'

View(CVH)
attach(CVH)
names(CVH)

##  [1] "DIRECTORIO"  "SECUENCIA_P" "P5000"       "P5010"       "P5020"      
##  [6] "P5030"       "P5040"       "P5050"       "P5070"       "P5080"      
## [11] "P5090"       "P5090S1"     "P5100"       "P5110"       "P5130"      
## [16] "P5140"       "P5210S1"     "P5210S2"     "P5210S3"     "P5210S4"    
## [21] "P5210S5"     "P5210S6"     "P5210S7"     "P5210S8"     "P5210S9"    
## [26] "P5210S10"    "P5210S11"    "P5210S14"    "P5210S15"    "P5210S16"   
## [31] "P5210S17"    "P5210S18"    "P5210S19"    "P5210S20"    "P5210S21"   
## [36] "P5210S22"    "P5210S24"    "P5220"       "P5220S1"     "P6008"      
## [41] "P6007"       "P6007S1"     "HOGAR"       "P4000"       "P4010"      
## [46] "P4020"       "P4030S1"     "P4030S1A1"   "P4030S2"     "P4030S3"    
## [51] "P4030S4"     "P4030S4A1"   "P4030S5"     "P4040"       "REGIS"      
## [56] "AREA"        "CLASE"       "MES"         "DPTO"        "fex_c_2011"

#vARIABLES NUMERICAS: P5000, p5010, P5220S1
#vARIABLES CATEGORICA: P5020, P5040, P5050

attach(CVH)

## The following objects are masked from CVH (pos = 3):
## 
##     AREA, CLASE, DIRECTORIO, DPTO, fex_c_2011, HOGAR, MES, P4000,
##     P4010, P4020, P4030S1, P4030S1A1, P4030S2, P4030S3, P4030S4,
##     P4030S4A1, P4030S5, P4040, P5000, P5010, P5020, P5030, P5040,
##     P5050, P5070, P5080, P5090, P5090S1, P5100, P5110, P5130, P5140,
##     P5210S1, P5210S10, P5210S11, P5210S14, P5210S15, P5210S16,
##     P5210S17, P5210S18, P5210S19, P5210S2, P5210S20, P5210S21,
##     P5210S22, P5210S24, P5210S3, P5210S4, P5210S5, P5210S6, P5210S7,
##     P5210S8, P5210S9, P5220, P5220S1, P6007, P6007S1, P6008, REGIS,
##     SECUENCIA_P

names(CVH)

##  [1] "DIRECTORIO"  "SECUENCIA_P" "P5000"       "P5010"       "P5020"      
##  [6] "P5030"       "P5040"       "P5050"       "P5070"       "P5080"      
## [11] "P5090"       "P5090S1"     "P5100"       "P5110"       "P5130"      
## [16] "P5140"       "P5210S1"     "P5210S2"     "P5210S3"     "P5210S4"    
## [21] "P5210S5"     "P5210S6"     "P5210S7"     "P5210S8"     "P5210S9"    
## [26] "P5210S10"    "P5210S11"    "P5210S14"    "P5210S15"    "P5210S16"   
## [31] "P5210S17"    "P5210S18"    "P5210S19"    "P5210S20"    "P5210S21"   
## [36] "P5210S22"    "P5210S24"    "P5220"       "P5220S1"     "P6008"      
## [41] "P6007"       "P6007S1"     "HOGAR"       "P4000"       "P4010"      
## [46] "P4020"       "P4030S1"     "P4030S1A1"   "P4030S2"     "P4030S3"    
## [51] "P4030S4"     "P4030S4A1"   "P4030S5"     "P4040"       "REGIS"      
## [56] "AREA"        "CLASE"       "MES"         "DPTO"        "fex_c_2011"

#Realice un cruce entre dos variables categoricas e interprete
Tabla1=table(P5020,P5040)
Tabla1

##      P5040
## P5020     1     2     3     4     5
##     1 15917     5     7    11    18
##     2   549     5    11    20     4
##     3   407    13     7     5     1
##     4     0     0     1     1     0
##     5     8     0     0     0     0
##     6    37     6    12    11     1

Tabla2=prop.table(Tabla1, margin = 2)
Tabla2

##      P5040
## P5020            1            2            3            4            5
##     1 0.9408322497 0.1724137931 0.1842105263 0.2291666667 0.7500000000
##     2 0.0324506443 0.1724137931 0.2894736842 0.4166666667 0.1666666667
##     3 0.0240572172 0.4482758621 0.1842105263 0.1041666667 0.0416666667
##     4 0.0000000000 0.0000000000 0.0263157895 0.0208333333 0.0000000000
##     5 0.0004728691 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##     6 0.0021870197 0.2068965517 0.3157894737 0.2291666667 0.0416666667

##Análisis

#Para inodoro conectado al alcantarillado el 98,59% tiene sanitario exclusivo para las personas del hogar y el 1,4% lo comparte con personas de otro hogar.\n#Para inodoro conectado a pozo septico el 95,24% tiene sanitario exclusivo para las personas del hogar y el 4,75% lo comparte con personas de otro hogar.\n#Para inodoro sin conexión el 95,81% tiene sanitario exclusivo para las personas del hogar y el 4,18% lo comparte con personas de otro hogar.\n#Para letrina el 100% lo tiene exclusivo para las personas del hogar.#Interpretación\n#Para inodoro conectado al alcantarillado el 98,59% tiene sanitario exclusivo para las personas del hogar y el 1,4% lo comparte con personas de otro hogar.\n#Para inodoro conectado a pozo septico el 95,24% tiene sanitario exclusivo para las personas del hogar y el 4,75% lo comparte con personas de otro hogar.\n#Para inodoro sin conexión el 95,81% tiene sanitario exclusivo para las personas del hogar y el 4,18% lo comparte con personas de otro hogar.\n#Para letrina el 100% lo tiene exclusivo para las personas del hogar."


col=c("YELLOW", "RED", "BLUE", "GREEN", "PURPLE", "BLACK")
barplot(Tabla2, col=col)
legend("topright", legend=c("Inodoro conectado a alcantarillado", "Inodoro conectado a pozo septico", "Inodoro sin conexion", "Letrina", "Bejamar", "No tiene servicio"), fill=col)

Tabla3=table(P5040,P5020)
Tabla3

##      P5020
## P5040     1     2     3     4     5     6
##     1 15917   549   407     0     8    37
##     2     5     5    13     0     0     6
##     3     7    11     7     1     0    12
##     4    11    20     5     1     0    11
##     5    18     4     1     0     0     1

Tabla4=prop.table(Tabla3, margin = 2)
Tabla4

##      P5020
## P5040            1            2            3            4            5
##     1 0.9974307557 0.9320882852 0.9399538106 0.0000000000 1.0000000000
##     2 0.0003133225 0.0084889643 0.0300230947 0.0000000000 0.0000000000
##     3 0.0004386515 0.0186757216 0.0161662818 0.5000000000 0.0000000000
##     4 0.0006893094 0.0339558574 0.0115473441 0.5000000000 0.0000000000
##     5 0.0011279609 0.0067911715 0.0023094688 0.0000000000 0.0000000000
##      P5020
## P5040            6
##     1 0.5522388060
##     2 0.0895522388
##     3 0.1791044776
##     4 0.1641791045
##     5 0.0149253731

col=c("PINK", "PURPLE", "RED", "YELLOW","ORANGE")
barplot(Tabla4, col=col)
legend("topright", legend=c("Por recoleccion publica o privada", "La tiran a un rio, quebrada, caño o laguna", "La tiran a un patio,lote,zanja o baldio", "La queman o entierran", "La eliminan de otra forma"), fill = col)

#Realice un cruce entre variables continua y categorica
library(crosstable)

## Warning: package 'crosstable' was built under R version 4.2.2

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.2.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

class(P5000)

## [1] "numeric"

class(P5020)

## [1] "numeric"

CVH$P5020=as.factor(P5020)
class(CVH$P5020)

## [1] "factor"

crosstable(CVH, col=P5000, by=P5020) %>% as_flextable(keep_id=TRUE)

.id	label	variable	P5020
P5000	P5000	Min / Max	1.0 / 32.0	1.0 / 11.0	1.0 / 8.0	3.0 / 3.0	1.0 / 2.0	1.0 / 5.0
Med [IQR]	3.0 [3.0;4.0]	3.0 [3.0;4.0]	3.0 [3.0;4.0]	3.0 [3.0;3.0]	1.0 [1.0;1.0]	2.0 [1.0;3.0]
Mean (std)	3.4 (1.1)	3.2 (1.1)	3.4 (1.3)	3.0 (0)	1.1 (0.4)	2.0 (1.0)
N (NA)	15958 (0)	589 (0)	433 (0)	2 (0)	8 (0)	67 (0)

c=crosstable(CVH, col=P5000, by=P5020)

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.2.2

p <- ggplot(CVH, aes(x=P5020, y=P5000, col=P5020, fill=P5020)) + geom_violin()
p

#Realice una tabla de frecuencia para variable categorica
library(fdth)

## Warning: package 'fdth' was built under R version 4.2.2

## 
## Attaching package: 'fdth'

## The following objects are masked from 'package:stats':
## 
##     sd, var

CVH$P5020=as.factor(P5020)
levels(CVH$P5020)=c(1,2,3,4,5,6)
Tabla5=fdt_cat(CVH$P5020, sort=F)
Tabla5

##  Category     f   rf rf(%)    cf  cf(%)
##         1 15958 0.94 93.56 15958  93.56
##         2   589 0.03  3.45 16547  97.01
##         3   433 0.03  2.54 16980  99.55
##         4     2 0.00  0.01 16982  99.56
##         5     8 0.00  0.05 16990  99.61
##         6    67 0.00  0.39 17057 100.00

barplot(table(CVH$P5020), main = "El servicio sanitario que utiliza el hogar es:", xlab = "Opciones", ylab = "Frecuencia", col="ORANGE")

#Realice una tabla de frecuencia para variable continua.
library(fdth)
Tabla6=fdt(P5010, h=1, start = 0, end = 14, na.rm=T)
Tabla6

##  Class limits    f   rf rf(%)    cf  cf(%)
##         [0,1)    0 0.00  0.00     0   0.00
##         [1,2) 5701 0.33 33.42  5701  33.42
##         [2,3) 7002 0.41 41.05 12703  74.47
##         [3,4) 3617 0.21 21.21 16320  95.68
##         [4,5)  625 0.04  3.66 16945  99.34
##         [5,6)   88 0.01  0.52 17033  99.86
##         [6,7)   18 0.00  0.11 17051  99.96
##         [7,8)    5 0.00  0.03 17056  99.99
##         [8,9)    0 0.00  0.00 17056  99.99
##        [9,10)    1 0.00  0.01 17057 100.00
##       [10,11)    0 0.00  0.00 17057 100.00
##       [11,12)    0 0.00  0.00 17057 100.00
##       [12,13)    0 0.00  0.00 17057 100.00
##       [13,14)    0 0.00  0.00 17057 100.00

plot(Tabla6, col="GREEN", xlab = "¿en cuántos de esos cuartos duermen las personas de este hogar?",)

p <- ggplot(CVH, aes(x=P5010))+ geom_histogram(binwidth=1, color="BLUE", fill="pink")
p

#Realice la estadística descriptiva acorde al tipo de variable.
##PARA VARIABLE CATEGORICA
P5020<-CVH$P5020
frecuencia<-table(P5020)
proporcion<-prop.table(frecuencia)
print(frecuencia)

## P5020
##     1     2     3     4     5     6 
## 15958   589   433     2     8    67

print(proporcion)

## P5020
##            1            2            3            4            5            6 
## 0.9355689746 0.0345312775 0.0253854722 0.0001172539 0.0004690157 0.0039280061

barplot(frecuencia, xlab = "P5020", ylab = "Frecuencia", main = "Tabla de frecuencia - P5020",col="blue")

pie(proporcion, labels = names(proporcion), main = "Tabla de proporciones - P5020",col="green")

####PARA VARIABLE NUMERICA
P5010<-CVH$P5010
media<-mean(P5010)
mediana<-median(P5010)
desviacion<-sd(P5010)
rango<-range(P5010)
print(media)

## [1] 1.97268

print(mediana)

## [1] 2

print(desviacion)

## [1] 0.8750258

print(rango)

## [1] 1 9

hist(P5010, col = "lightblue", main = "Distribución de P5010", xlab = "P5010", ylab = "Frecuencia")
abline(v = media, col = "blue", lwd = 2, lty = 2)
abline(v = mediana, col = "red", lwd = 2, lty = 2)
abline(v = media - desviacion, col = "yellow", lwd = 2, lty = 2)
abline(v = media + desviacion, col = "yellow", lwd = 2, lty = 2)
abline(v = rango, col = "purple", lwd = 2, lty = 2)
legend("topright", legend = c("Media", "Mediana", "± Desviación estándar", "Rango"), col = c("red", "blue", "orange", "purple"), lty = 2, lwd = 2)

#Calcule el coeficiente de correlación entre dos variables numéricas. 
library(corrgram)

## Warning: package 'corrgram' was built under R version 4.2.3

cor(CVH$P5000, CVH$P5010, use = "complete.obs")

## [1] 0.6171798

plot(P5000, P5010, col="RED")

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.2.3

## corrplot 0.92 loaded

R=data.frame(CVH$P5000, CVH$P5010)
x=cor(R, use = "complete.obs")
corrplot(x, method="number")

x2=cor(x, method = "spearman")
x2

##           CVH.P5000 CVH.P5010
## CVH.P5000         1        -1
## CVH.P5010        -1         1

corrplot(x2, method="number")

Inclusión del código

#library(readxl) CVH <- read_excel(“~/Cabecera - Vivienda y Hogares.xlsx”) View(CVH) attach(CVH) names(CVH)

#vARIABLES NUMERICAS: P5000, p5010, P5220S1 #vARIABLES CATEGORICA: P5020, P5040, P5050

attach(CVH) names(CVH) #Realice un cruce entre dos variables categoricas e interprete Tabla1=table(P5020,P5040) Tabla1 Tabla2=prop.table(Tabla1, margin = 2) Tabla2 col=c(“YELLOW”, “RED”, “BLUE”, “GREEN”, “PURPLE”, “BLACK”) barplot(Tabla2, col=col) legend(“topright”, legend=c(“Inodoro conectado a alcantarillado”, “Inodoro conectado a pozo septico”, “Inodoro sin conexion”, “Letrina”, “Bejamar”, “No tiene servicio”), fill=col)

Tabla3=table(P5040,P5020) Tabla3 Tabla4=prop.table(Tabla3, margin = 2) Tabla4 col=c(“RED”, “PINK”, “GREEN”, “GRAY”,“ORANGE”) barplot(Tabla4, col=col) legend(“topright”, legend=c(“Por recoleccion publica o privada”, “La tiran a un rio, quebrada, caño o laguna”, “La tiran a un patio,lote,zanja o baldio”, “La queman o entierran”, “La eliminan de otra forma”), fill = col)

#Realice un cruce entre variables continua y categorica library(crosstable) library(dplyr) class(P5000) class(P5020) CVH\(P5020=as.factor(P5020) class(CVH\)P5020) crosstable(CVH, col=P5000, by=P5020) %>% as_flextable(keep_id=TRUE) c=crosstable(CVH, col=P5000, by=P5020)

library(ggplot2) p <- ggplot(CVH, aes(x=P5020, y=P5000, col=P5020, fill=P5020)) + geom_violin() p

#Realice una tabla de frecuencia para variable categorica library(fdth) CVH\(P5020=as.factor(P5020) levels(CVH\)P5020)=c(1,2,3,4,5,6) Tabla5=fdt_cat(CVH\(P5020, sort=F) Tabla5 barplot(table(CVH\)P5020), main = “El servicio sanitario que utiliza el hogar es:”, xlab = “Opciones”, ylab = “Frecuencia”, col=“YELLOW”)

#Realice una tabla de frecuencia para variable continua. library(fdth) Tabla6=fdt(P5010, h=1, start = 0, end = 14, na.rm=T) Tabla6 plot(Tabla6, col=“GREEN”, xlab = “¿en cuántos de esos cuartos duermen las personas de este hogar?”,) p <- ggplot(CVH, aes(x=P5010))+ geom_histogram(binwidth=1, color=“orange”, fill=“pink”) p

#Realice la estadística descriptiva acorde al tipo de variable. ##PARA VARIABLE CATEGORICA P5020<-CVH\(P5020 frecuencia<-table(P5020) proporcion<-prop.table(frecuencia) print(frecuencia) print(proporcion) barplot(frecuencia, xlab = "P5020", ylab = "Frecuencia", main = "Tabla de frecuencia - P5020",col="blue") pie(proporcion, labels = names(proporcion), main = "Tabla de proporciones - P5020",col="green") ####PARA VARIABLE NUMERICA P5010<-CVH\)P5010 media<-mean(P5010) mediana<-median(P5010) desviacion<-sd(P5010) rango<-range(P5010) print(media) print(mediana) print(desviacion) print(rango) hist(P5010, col = “lightblue”, main = “Distribución de P5010”, xlab = “P5010”, ylab = “Frecuencia”) abline(v = media, col = “red”, lwd = 2, lty = 2) abline(v = mediana, col = “blue”, lwd = 2, lty = 2) abline(v = media - desviacion, col = “orange”, lwd = 2, lty = 2) abline(v = media + desviacion, col = “orange”, lwd = 2, lty = 2) abline(v = rango, col = “purple”, lwd = 2, lty = 2) legend(“topright”, legend = c(“Media”, “Mediana”, “± Desviación estándar”, “Rango”), col = c(“red”, “blue”, “orange”, “purple”), lty = 2, lwd = 2)

#Calcule el coeficiente de correlación entre dos variables numéricas. library(corrgram) cor(CVH\(P5000, CVH\)P5010, use = “complete.obs”) plot(P5000, P5010, col=“RED”)

library(corrplot) R=data.frame(CVH\(P5000, CVH\)P5010) x=cor(R, use = “complete.obs”) corrplot(x, method=“number”) x2=cor(x, method = “spearman”) x2 corrplot(x2, method=“number”)

Taller Pre examen

Fabián Yesid loza Ayala-2204155

2023-07-16

R Markdown Fabian Loza

Inclusión del código