Paquetes

# install.packages("devtools")
# devtools::install_github("ankitrohatgi/digitizeR")
# library('digitizeR')
library("tidyverse")
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.0.0     v purrr   0.2.5
v tibble  1.4.2     v dplyr   0.7.6
v tidyr   0.8.1     v stringr 1.3.0
v readr   1.1.1     v forcats 0.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

extracción manual de datos del gráfico

Figura 2

Figura 2

# app <- wpd.launch()

Dataset

df <- read_csv("DisparitiesCaries.csv")
Parsed with column specification:
cols(
  MCD = col_double(),
  `Caries prevalence` = col_double(),
  Name = col_character()
)
df
# A tibble: 13 x 3
      MCD `Caries prevalence` Name                 
    <dbl>               <dbl> <chr>                
 1 0.0502                28.4 La Pintana           
 2 0.137                 53.9 Lampa                
 3 0.210                 26.7 Recoleta             
 4 0.305                 40.7 Independencia        
 5 0.386                 42.5 San Jose             
 6 0.413                 46.2 Isla de Maipo        
 7 0.437                 24.6 Maipu                
 8 0.482                 42.7 Melipilla            
 9 0.533                 35.3 Peñalolen            
10 0.604                 40.4 Cerrillos            
11 0.770                 39.4 Santiago             
12 0.851                 29.4 Providencia          
13 0.952                 17.0 Vitacura – Las Condes
glimpse(df)
Observations: 13
Variables: 3
$ MCD                 <dbl> 0.05022321, 0.13743133, 0.21005779, 0.30511758, 0.38561261, 0.41320146, 0.43709088, 0.482359...
$ `Caries prevalence` <dbl> 28.41328, 53.94834, 26.71587, 40.66421, 42.50923, 46.19926, 24.57565, 42.73063, 35.27675, 40...
$ Name                <chr> "La Pintana", "Lampa", "Recoleta", "Independencia", "San Jose", "Isla de Maipo", "Maipu", "M...
summary(df)
      MCD          Caries prevalence     Name          
 Min.   :0.05022   Min.   :17.05     Length:13         
 1st Qu.:0.30512   1st Qu.:28.41     Class :character  
 Median :0.43709   Median :39.41     Mode  :character  
 Mean   :0.47169   Mean   :35.94                       
 3rd Qu.:0.60450   3rd Qu.:42.51                       
 Max.   :0.95217   Max.   :53.95                       

x~y plot

df %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`)) + 
  geom_point()

df %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`, label = Name)) + # agrego label
  geom_point() + 
  theme_minimal() + 
  geom_text(vjust = 1.5, nudge_x = 0.007, size = 2.7) #agrego el texto de label

con línea de regresión

regression lineal

geom_smooth(method=‘lm’,formula=y~x)

regression ajustada

df %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`, label = Name)) +
  geom_point() + 
  theme_minimal() + 
  geom_text(vjust = 1.5, nudge_x = 0.007, size = 2.7) +
  geom_smooth() # agrego area de regresion

Voy a probar con GAM y (y ~ poly(x, 2))

Es vitacura un outlier?

df %>% 
  filter(Name == "Vitacura – Las Condes") %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`, label = Name)) +
  geom_point() + 
  theme_minimal() + 
  geom_text(vjust = 1.5, nudge_x = 0.007, size = 2.7)

  # geom_smooth() # agrego area de regresion
df %>% 
  filter(Name != "Vitacura – Las Condes") %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`, label = Name)) +
  geom_point() + 
  theme_minimal() + 
  geom_text(vjust = 1.5, nudge_x = 0.007, size = 2.7) +
  geom_smooth() # agrego area de regresion

Aparentemente si

elimino Lampa, Maipu y Vitacura

df %>% 
  filter(Name != c("Vitacura – Las Condes", "Lampa")) %>%
  filter(Name != "Maipu") %>% 
  ggplot(aes(x = MCD, y = `Caries prevalence`, label = Name)) +
  geom_point() + 
  theme_minimal() + 
  geom_text(vjust = 1.5, nudge_x = 0.007, size = 2.7) +
  geom_smooth() # agrego area de regresion
longer object length is not a multiple of shorter object length

Barplot

df %>% 
  ggplot(aes(x = Name, y = `Caries prevalence`)) +
  geom_col()

Todos los gráficos de barra tienen un orden

df %>% 
  ggplot(aes(x = fct_reorder(Name, `Caries prevalence`), y = `Caries prevalence`)) +
  geom_col() +
  coord_flip() + 
  labs(y = "Prevalencia de caries", x = "Comuna")

promedio

promedio_caries <- mean(df$`Caries prevalence`)
sd_caries <- sd(df$`Caries prevalence`)

De hecho parece que Lampa, Maipu y Vitacura son outliers

Regresion

library(broom)

todas las comunas

dfReg <- df %>% 
  do(reg = lm(`Caries prevalence` ~ MCD, data=.))
ANOVAreg <-  dfReg %>% 
  rowwise %>% 
  do(anova(.$reg)) 
ANOVAreg <-  as.data.frame(ANOVAreg)
ANOVAreg
  Df   Sum Sq   Mean Sq  F value    Pr(>F)
1  1  160.471 160.47102 1.628871 0.2281474
2 11 1083.684  98.51674       NA        NA
tidy(dfReg,reg)  
         term  estimate std.error statistic      p.value
1 (Intercept)  42.30028  5.692609  7.430736 1.308437e-05
2         MCD -13.48198 10.563565 -1.276272 2.281474e-01
glance(dfReg,reg)
  r.squared adj.r.squared   sigma statistic   p.value df    logLik      AIC      BIC deviance df.residual
1 0.1289799    0.04979626 9.92556  1.628871 0.2281474  2 -47.19682 100.3936 102.0885 1083.684          11

Regresion sin Vitacura

¿Qué pasa si elimino Vitacura?

dfReg_sinVitacura <- df %>%
  filter(Name != "Vitacura – Las Condes") %>% 
  do(reg_sinVitacura = lm(`Caries prevalence` ~ MCD, data=.))
ANOVAreg_sinVitacura <-  dfReg_sinVitacura %>% 
  rowwise %>% 
  do(anova(.$reg)) 
ANOVAreg_sinVitacura <-  as.data.frame(ANOVAreg_sinVitacura)
ANOVAreg_sinVitacura
  Df     Sum Sq   Mean Sq    F value    Pr(>F)
1  1   6.761167  6.761167 0.07947736 0.7837551
2 10 850.703555 85.070356         NA        NA
tidy(dfReg_sinVitacura,reg_sinVitacura) 
         term  estimate std.error  statistic      p.value
1 (Intercept) 38.926372  5.669149  6.8663516 4.370185e-05
2         MCD -3.268847 11.595056 -0.2819173 7.837551e-01
glance(dfReg_sinVitacura,reg_sinVitacura)
    r.squared adj.r.squared    sigma  statistic   p.value df   logLik      AIC      BIC deviance df.residual
1 0.007885067   -0.09132643 9.223359 0.07947736 0.7837551  2 -42.5942 91.18841 92.64313 850.7036          10
LS0tDQp0aXRsZTogIkRpc3Bhcml0aWVzIGluIENhcmllcyBwcmV2YWxlbmNlIGluIGNoaWxkaG9vZCINCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIHRvYzogeWVzDQotLS0NCiMgUGFxdWV0ZXMNCmBgYHtyLCB3YXJuaW5nPUZBTFNFfQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJkZXZ0b29scyIpDQojIGRldnRvb2xzOjppbnN0YWxsX2dpdGh1YigiYW5raXRyb2hhdGdpL2RpZ2l0aXplUiIpDQojIGxpYnJhcnkoJ2RpZ2l0aXplUicpDQpsaWJyYXJ5KCJ0aWR5dmVyc2UiKQ0KYGBgDQojIGV4dHJhY2Npw7NuIG1hbnVhbCBkZSBkYXRvcyBkZWwgZ3LDoWZpY28NCg0KDQoNCiFbRmlndXJhIDJdKC4vY2FyaWVzX3ByZWVzY29sYXJlcy5wbmcpDQoNCg0KDQoNCmBgYHtyfQ0KIyBhcHAgPC0gd3BkLmxhdW5jaCgpDQpgYGANCiMgRGF0YXNldA0KYGBge3J9DQpkZiA8LSByZWFkX2NzdigiRGlzcGFyaXRpZXNDYXJpZXMuY3N2IikNCmBgYA0KYGBge3J9DQpkZg0KYGBgDQoNCmBgYHtyfQ0KZ2xpbXBzZShkZikNCmBgYA0KYGBge3J9DQpzdW1tYXJ5KGRmKQ0KYGBgDQojIHh+eSBwbG90DQpgYGB7cn0NCmRmICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gTUNELCB5ID0gYENhcmllcyBwcmV2YWxlbmNlYCkpICsgDQogIGdlb21fcG9pbnQoKQ0KYGBgDQoNCmBgYHtyfQ0KZGYgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBNQ0QsIHkgPSBgQ2FyaWVzIHByZXZhbGVuY2VgLCBsYWJlbCA9IE5hbWUpKSArICMgYWdyZWdvIGxhYmVsDQogIGdlb21fcG9pbnQoKSArIA0KICB0aGVtZV9taW5pbWFsKCkgKyANCiAgZ2VvbV90ZXh0KHZqdXN0ID0gMS41LCBudWRnZV94ID0gMC4wMDcsIHNpemUgPSAyLjcpICNhZ3JlZ28gZWwgdGV4dG8gZGUgbGFiZWwNCmBgYA0KIyMgY29uIGzDrW5lYSBkZSByZWdyZXNpw7NuDQoNCiMjIyByZWdyZXNzaW9uIGxpbmVhbA0KZ2VvbV9zbW9vdGgobWV0aG9kPSdsbScsZm9ybXVsYT15fngpDQpgYGB7cn0NCmRmICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gTUNELCB5ID0gYENhcmllcyBwcmV2YWxlbmNlYCwgbGFiZWwgPSBOYW1lKSkgKw0KICBnZW9tX3BvaW50KCkgKyANCiAgdGhlbWVfbWluaW1hbCgpICsgDQogIGdlb21fdGV4dCh2anVzdCA9IDEuNSwgbnVkZ2VfeCA9IDAuMDA3LCBzaXplID0gMi43KSArDQogIGdlb21fc21vb3RoKG1ldGhvZD0nbG0nLGZvcm11bGE9eX54KSAjIGFncmVnbyBhcmVhIGRlIHJlZ3Jlc2lvbg0KYGBgDQoNCg0KIyMjIHJlZ3Jlc3Npb24gYWp1c3RhZGENCmBgYHtyfQ0KZGYgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBNQ0QsIHkgPSBgQ2FyaWVzIHByZXZhbGVuY2VgLCBsYWJlbCA9IE5hbWUpKSArDQogIGdlb21fcG9pbnQoKSArIA0KICB0aGVtZV9taW5pbWFsKCkgKyANCiAgZ2VvbV90ZXh0KHZqdXN0ID0gMS41LCBudWRnZV94ID0gMC4wMDcsIHNpemUgPSAyLjcpICsNCiAgZ2VvbV9zbW9vdGgoKSAjIGFncmVnbyBhcmVhIGRlIHJlZ3Jlc2lvbg0KYGBgDQpWb3kgYSBwcm9iYXIgY29uIEdBTSB5ICh5IH4gcG9seSh4LCAyKSkNCg0KYGBge3J9DQpkZiAlPiUgDQogIGdncGxvdChhZXMoeCA9IE1DRCwgeSA9IGBDYXJpZXMgcHJldmFsZW5jZWAsIGxhYmVsID0gTmFtZSkpICsNCiAgZ2VvbV9wb2ludCgpICsgDQogIHRoZW1lX21pbmltYWwoKSArIA0KICBnZW9tX3RleHQodmp1c3QgPSAxLjUsIG51ZGdlX3ggPSAwLjAwNywgc2l6ZSA9IDIuNykgKw0KICBnZW9tX3Ntb290aChtZXRob2QgPSAiZ2FtIiwgZm9ybXVsYSA9IHkgfiBwb2x5KHgsIDIpKSArIA0KICBnZW9tX3Ntb290aChtZXRob2QgPSBsbSwgc2UgPSBGQUxTRSwgbGluZXR5cGUgPSAiZGFzaGVkIikNCg0KYGBgDQoNCg0KIyMgRXMgdml0YWN1cmEgdW4gb3V0bGllcj8NCmBgYHtyfQ0KZGYgJT4lIA0KICBmaWx0ZXIoTmFtZSA9PSAiVml0YWN1cmEg4oCTIExhcyBDb25kZXMiKSAlPiUgDQogIGdncGxvdChhZXMoeCA9IE1DRCwgeSA9IGBDYXJpZXMgcHJldmFsZW5jZWAsIGxhYmVsID0gTmFtZSkpICsNCiAgZ2VvbV9wb2ludCgpICsgDQogIHRoZW1lX21pbmltYWwoKSArIA0KICBnZW9tX3RleHQodmp1c3QgPSAxLjUsIG51ZGdlX3ggPSAwLjAwNywgc2l6ZSA9IDIuNykNCiAgIyBnZW9tX3Ntb290aCgpICMgYWdyZWdvIGFyZWEgZGUgcmVncmVzaW9uDQpgYGANCg0KDQpgYGB7cn0NCmRmICU+JSANCiAgZmlsdGVyKE5hbWUgIT0gIlZpdGFjdXJhIOKAkyBMYXMgQ29uZGVzIikgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBNQ0QsIHkgPSBgQ2FyaWVzIHByZXZhbGVuY2VgLCBsYWJlbCA9IE5hbWUpKSArDQogIGdlb21fcG9pbnQoKSArIA0KICB0aGVtZV9taW5pbWFsKCkgKyANCiAgZ2VvbV90ZXh0KHZqdXN0ID0gMS41LCBudWRnZV94ID0gMC4wMDcsIHNpemUgPSAyLjcpICsNCiAgZ2VvbV9zbW9vdGgoKSAjIGFncmVnbyBhcmVhIGRlIHJlZ3Jlc2lvbg0KYGBgDQpBcGFyZW50ZW1lbnRlIHNpDQoNCmVsaW1pbm8gTGFtcGEsIE1haXB1IHkgVml0YWN1cmENCg0KYGBge3J9DQpkZiAlPiUgDQogIGZpbHRlcihOYW1lICE9IGMoIlZpdGFjdXJhIOKAkyBMYXMgQ29uZGVzIiwgIkxhbXBhIikpICU+JQ0KICBmaWx0ZXIoTmFtZSAhPSAiTWFpcHUiKSAlPiUgDQogIGdncGxvdChhZXMoeCA9IE1DRCwgeSA9IGBDYXJpZXMgcHJldmFsZW5jZWAsIGxhYmVsID0gTmFtZSkpICsNCiAgZ2VvbV9wb2ludCgpICsgDQogIHRoZW1lX21pbmltYWwoKSArIA0KICBnZW9tX3RleHQodmp1c3QgPSAxLjUsIG51ZGdlX3ggPSAwLjAwNywgc2l6ZSA9IDIuNykgKw0KICBnZW9tX3Ntb290aCgpICMgYWdyZWdvIGFyZWEgZGUgcmVncmVzaW9uDQpgYGANCg0KDQojIEJhcnBsb3QNCmBgYHtyfQ0KZGYgJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBOYW1lLCB5ID0gYENhcmllcyBwcmV2YWxlbmNlYCkpICsNCiAgZ2VvbV9jb2woKQ0KYGBgDQpUb2RvcyBsb3MgZ3LDoWZpY29zIGRlIGJhcnJhIHRpZW5lbiB1biBvcmRlbg0KYGBge3J9DQpkZiAlPiUgDQogIGdncGxvdChhZXMoeCA9IGZjdF9yZW9yZGVyKE5hbWUsIGBDYXJpZXMgcHJldmFsZW5jZWApLCB5ID0gYENhcmllcyBwcmV2YWxlbmNlYCkpICsNCiAgZ2VvbV9jb2woKSArDQogIGNvb3JkX2ZsaXAoKSArIA0KICBsYWJzKHkgPSAiUHJldmFsZW5jaWEgZGUgY2FyaWVzIiwgeCA9ICJDb211bmEiKQ0KYGBgDQojIyBwcm9tZWRpbw0KDQpgYGB7cn0NCnByb21lZGlvX2NhcmllcyA8LSBtZWFuKGRmJGBDYXJpZXMgcHJldmFsZW5jZWApDQpzZF9jYXJpZXMgPC0gc2QoZGYkYENhcmllcyBwcmV2YWxlbmNlYCkNCmBgYA0KDQpgYGB7cn0NCmRmICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gZmN0X3Jlb3JkZXIoTmFtZSwgYENhcmllcyBwcmV2YWxlbmNlYCksIHkgPSBgQ2FyaWVzIHByZXZhbGVuY2VgKSkgKw0KICBnZW9tX2NvbCgpICsNCiAgY29vcmRfZmxpcCgpICsgDQogIGdlb21faGxpbmUoYWVzKHlpbnRlcmNlcHQgPSBwcm9tZWRpb19jYXJpZXMsIGNvbG91cj0iUHJvbWVkaW8iKSkgKyANCiAgZ2VvbV9obGluZShhZXMoeWludGVyY2VwdCA9IHByb21lZGlvX2NhcmllcyArIHNkX2NhcmllcywgIGNvbG91cj0iU0QiKSkgKw0KICBnZW9tX2hsaW5lKGFlcyh5aW50ZXJjZXB0ID0gcHJvbWVkaW9fY2FyaWVzIC0gc2RfY2FyaWVzLCAgY29sb3VyPSJTRCIpKSArDQogIGxhYnMoeSA9ICJQcmV2YWxlbmNpYSBkZSBjYXJpZXMiLCB4ID0gIkNvbXVuYSIsIGNvbG91ciA9ICJQcm9tZWRpbyB5IFNEIikNCmBgYA0KDQpEZSBoZWNobyBwYXJlY2UgcXVlIExhbXBhLCBNYWlwdSB5IFZpdGFjdXJhIHNvbiBvdXRsaWVycw0KDQojIFJlZ3Jlc2lvbg0KYGBge3J9DQpsaWJyYXJ5KGJyb29tKQ0KYGBgDQoNCiMjIHRvZGFzIGxhcyBjb211bmFzDQpgYGB7cn0NCmRmUmVnIDwtIGRmICU+JSANCiAgZG8ocmVnID0gbG0oYENhcmllcyBwcmV2YWxlbmNlYCB+IE1DRCwgZGF0YT0uKSkNCkFOT1ZBcmVnIDwtICBkZlJlZyAlPiUgDQogIHJvd3dpc2UgJT4lIA0KICBkbyhhbm92YSguJHJlZykpIA0KQU5PVkFyZWcgPC0gIGFzLmRhdGEuZnJhbWUoQU5PVkFyZWcpDQpBTk9WQXJlZw0KYGBgDQoNCmBgYHtyfQ0KdGlkeShkZlJlZyxyZWcpICANCmBgYA0KDQpgYGB7cn0NCmdsYW5jZShkZlJlZyxyZWcpDQpgYGANCiMjIFJlZ3Jlc2lvbiBzaW4gVml0YWN1cmENCsK/UXXDqSBwYXNhIHNpIGVsaW1pbm8gVml0YWN1cmE/DQoNCmBgYHtyfQ0KZGZSZWdfc2luVml0YWN1cmEgPC0gZGYgJT4lDQogIGZpbHRlcihOYW1lICE9ICJWaXRhY3VyYSDigJMgTGFzIENvbmRlcyIpICU+JSANCiAgZG8ocmVnX3NpblZpdGFjdXJhID0gbG0oYENhcmllcyBwcmV2YWxlbmNlYCB+IE1DRCwgZGF0YT0uKSkNCkFOT1ZBcmVnX3NpblZpdGFjdXJhIDwtICBkZlJlZ19zaW5WaXRhY3VyYSAlPiUgDQogIHJvd3dpc2UgJT4lIA0KICBkbyhhbm92YSguJHJlZykpIA0KQU5PVkFyZWdfc2luVml0YWN1cmEgPC0gIGFzLmRhdGEuZnJhbWUoQU5PVkFyZWdfc2luVml0YWN1cmEpDQpBTk9WQXJlZ19zaW5WaXRhY3VyYQ0KYGBgDQpgYGB7cn0NCnRpZHkoZGZSZWdfc2luVml0YWN1cmEscmVnX3NpblZpdGFjdXJhKSANCmBgYA0KYGBge3J9DQpnbGFuY2UoZGZSZWdfc2luVml0YWN1cmEscmVnX3NpblZpdGFjdXJhKQ0KYGBgDQoNCg==