installed.packages() install.packages(“rmarkdown”) install.packages(“knitr”) install.packages(“kable”) install.packages(“data.table”) install.packages(“ggplot2”) install.packages(“tidyverse”) install.packages(“plotly”) install.packages(‘RColorBrewer’) install.packages(“janitor”) install.packages(“broom”) install.packages(“rgbif”) install.packages(“xfun”) install.packages(“tinytex”) install.packages(“munsell”) tinytex::pdflatex(‘test.tex’) tinytex:::install_yihui_pkgs()
rm(list=ls())
library('data.table')
library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library("ggplot2")
library("janitor")
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library("RColorBrewer")
library("tinytex")
library("xfun")
##
## Attaching package: 'xfun'
## The following objects are masked from 'package:base':
##
## attr, isFALSE
hogares=fread("Base_Hogares.csv", encoding = "Latin-1")
hogares= as.data.table(hogares)
hogares
## Hogar Macrozona Zona Comuna Manzana DirCoordX DirCoordY Fecha
## 1: 90431 1 9 2 90431 253073.7 6341154 2014-11-25
## 2: 1012966 13 101 3 1012966 266270.6 6342393 2015-05-08
## 3: 1580550 17 158 4 1580550 263457.5 6352430 2014-11-04
## 4: 1630397 17 163 4 1630397 261833.0 6353000 2015-11-18
## 5: 3385901 1 3 2 33843 252717.4 6341820 2015-04-01
## ---
## 8772: 3563001037 19 356 5 3563001 269103.5 6340579 2014-09-10
## 8773: 10100810901 13 101 3 1010081 265910.7 6341803 2015-05-08
## 8774: 21618310901 21 216 5 2161831 274441.3 6339604 2014-10-28
## 8775: 23550461901 26 235 6 2350461 278998.6 6341682 2014-09-30
## 8776: 50234990903 6 50 2 502349 259774.5 6332118 2015-05-13
## DiaAsig TipoDia NumPer NumVeh Propiedad NoSabeNoResponde MontoDiv
## 1: 2 1 3 0 1 1 NA
## 2: 5 1 3 1 1 0 NA
## 3: 2 1 5 3 1 0 NA
## 4: 3 1 2 1 3 0 NA
## 5: 3 1 2 0 3 0 NA
## ---
## 8772: 3 1 4 0 1 0 NA
## 8773: 5 1 3 0 1 0 NA
## 8774: 2 1 3 0 3 0 NA
## 8775: 2 1 2 1 1 0 NA
## 8776: 3 1 2 1 1 0 NA
## MontoArrEstima MontoArrPaga IngresoHogar Factor_Laboral Factor_Sabado
## 1: NA NA 241483 49.61113 NA
## 2: 280000 NA 1156372 47.45112 NA
## 3: 1000000 NA 3442226 53.75151 NA
## 4: NA 350000 695736 35.05007 NA
## 5: NA 100000 258299 40.28616 NA
## ---
## 8772: 120000 NA 331018 27.74549 NA
## 8773: 260000 NA 769430 45.89651 NA
## 8774: NA 80000 626361 34.22977 NA
## 8775: 200000 NA 1000248 35.39052 NA
## 8776: 300000 NA 1191615 45.21407 NA
## Factor_Domingo Factor aux comunahg Macrozonahg Factorhg
## 1: NA 40.82472 1 Valparaíso Playa Ancha 40.82472
## 2: NA 39.15974 1 Viña del Mar Viña del Mar Oriente 39.15974
## 3: NA 45.42331 1 Concon Concón Poniente 45.42331
## 4: NA 35.33358 1 Concon Concón Poniente 35.33358
## 5: NA 32.25464 1 Valparaíso Playa Ancha 32.25464
## ---
## 8772: NA 23.72622 1 Quilpue Quilpué Poniente 23.72622
## 8773: NA 38.05306 1 Viña del Mar Viña del Mar Oriente 38.05306
## 8774: NA 30.05988 1 Quilpue El Belloto 30.05988
## 8775: NA 27.56420 1 Villa Alemana Villa Alemana Norte 27.56420
## 8776: NA 39.39303 1 Valparaíso Placilla-Curauma 39.39303
ggplot(data = hogares[IngresoHogar < 2000000], aes(x = IngresoHogar)) + geom_histogram(bins=40)
ingprom= hogares %>%
group_by(Macrozona)%>%
summarise(mean(IngresoHogar))
colnames(ingprom)
## [1] "Macrozona" "mean(IngresoHogar)"
colnames(ingprom)= c("Macrozona", "Prom")
ggplot(aes (x = Macrozona, y = Prom), data= ingprom) + stat_summary(fun="mean", geom="bar")
ggplot(aes (x = Macrozona, y = Prom), data= ingprom) + stat_summary(fun="mean", geom="bar")+
labs(x="N°Macrozona", y="SalarioPromedio", title="Promedio de salario" , subtitle = "Por macrozona", caption = "Fuente:Base_Hogares.csv") +
theme(axis.text.x = element_blank())
rest=fread("restaurantes.csv", encoding = "Latin-1")
ggplot(data=rest, aes (x = reviews, y = rating, colour = COMUNA)) + geom_point()
mult=rest[, reviews]* rest[, rating]
indicador=mult/1000
total=cbind(rest, indicador)
total[,indicador]
## [1] 7.7878 1.1528 0.0473 0.0111 0.3913 6.6375 2.7270 6.2235 6.9345 1.4076
## [11] 0.0329 0.8883 0.0150 7.6780 0.0980 3.8399 1.4112 1.6290 4.1630 2.4534
## [21] 1.5928 0.7812 0.5400 1.0648 0.0473 0.2392 0.0300 0.2565 0.1058 0.4563
## [31] 0.0100 0.0080 1.8814 0.0987 0.0090 0.1188 0.0070
PlanVina= c(7.7878, 1.1528, 0.0473, 0.0111, 0.3913, 6.6375, 2.7270, 6.2235, 1.4112, 0.7812)
mean(PlanVina)
## [1] 2.71707
PlanValp=c(6.9345, 1.4076, 0.0329, 0.0980, 3.8399, 0.2392, 1.0648)
mean(PlanValp)
## [1] 1.945271
Valpoal= c(0.8883, 0.0150, 7.6780, 0.1058)
mean(Valpoal)
## [1] 2.171775