library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(srvyr)
##
## Attaching package: 'srvyr'
##
## The following object is masked from 'package:stats':
##
## filter
options(survey.lonely.psu="certainty")
set.seed(42)
options(scipen=999)
if(curl::has_internet() ) {
googlesheets4::gs4_auth("passi.solar@gmail.com")
# ss3<-googlesheets4::gs4_create("ENS_IMC_p2")
ss3<-googlesheets4::gs4_get("1ExkpYGVNOYSEKaReYSNpiBHi6YCKtHAGNlKuRjHpKE4")
# googlesheets4::gs4_browse(ss3)
# ss3<-googlesheets4::gs4_get("1ihVVBow7LHxvGoRCSElL8G32emKPklZxbcpe68amERc")
# ss3_r <- googlesheets4::gs4_create("minsal_rcv_report")
# ss3_r<-googlesheets4::gs4_get("1IilgbYUgSlBUkwTNSSM066bunBndgqdQBtCoOeQOnVM")
# googlesheets4::sheet_write(desc_ldw, ss = ss3, sheet = "desc_ldw")
# googlesheets4::gs4_browse(ss3)
}
times<-read.csv("times.csv")
d<-data.frame(trabajo="ENS_IMC_p2",
day=as.character(Sys.Date()),
time=as.character(Sys.time()),
type="fin")
d<-bind_rows(times,d)
write.csv(d,"times.csv", row.names = FALSE)
dict<-read.csv("ENS2003_2010_2017_book.csv")
dirx<-"/home/alvaro-passi/MEGA/ENS/Bases/Originales"
list_files_ENS<-list.files(dirx, recursive = T)
df0<-rio::import("df0_imc.Rds", trust=T)
#
# - **ENS 2003:** 20-25 años.
# - **ENS 2010:** 27-32 años.
# - **ENS 2017:** 34-39 años.
survey_design_o<-df0 %>%
mutate(ENS=as.factor(ENS))%>%
subset(!is.na(fexp) & fexp>0)%>%
as_survey_design(id=Conglomerado_, weight = fexp, strata=strata_, nest=TRUE)
options(survey.lonely.psu="certainty")
table(survey_design_o$variables$ENS)
##
## 2003 2010 2017
## 3452 4884 5520
survey_design<-survey_design_o %>%
subset(!is.na(IMC))
#
# survey_design<-survey_design %>%
# subset(Edad>=18 & Edad<=Edad_max) %>%
# # subset(Edad>=18 & Edad<=Edad_max)%>%
# subset(talla>100)
survey_design_s<- survey_design %>%
subset(
(ENS==2003 & Edad>=20 & Edad<=25) |
(ENS==2010 & Edad>=27 & Edad<=32) |
(ENS==2017 & Edad>=34 & Edad<=39)
)
tertiles <- c(0.3333, 0.6667)
# Calculate tertiles
tertile_results <- survey_design_s %>%
group_by(ENS) %>%
summarise(
tertile_values = survey_quantile(
IMC,
quantiles = tertiles,
na.rm = TRUE,
vartype = "se"
)
)
tertile_results
## # A tibble: 3 × 5
## ENS tertile_values_q3333 tertile_values_q6667 tertile_values_q3333_se
## <fct> <dbl> <dbl> <dbl>
## 1 2003 22.0 24.3 0.213
## 2 2010 24.0 27.4 0.540
## 3 2017 26.5 30.1 0.412
## # ℹ 1 more variable: tertile_values_q6667_se <dbl>
names(survey_design_s$variables)
## [1] "IDbaseENS" "Sexo" "Edad" "Zona"
## [5] "NEDU" "Region" "fexp" "Conglomerado_"
## [9] "strata_" "peso" "talla" "IMC"
## [13] "ENS" "Edad_cat" "TOTAL"
survey_design_s$variables<-left_join(survey_design_s$variables,tertile_results, by="ENS")
names(survey_design_s$variables)
## [1] "IDbaseENS" "Sexo"
## [3] "Edad" "Zona"
## [5] "NEDU" "Region"
## [7] "fexp" "Conglomerado_"
## [9] "strata_" "peso"
## [11] "talla" "IMC"
## [13] "ENS" "Edad_cat"
## [15] "TOTAL" "tertile_values_q3333"
## [17] "tertile_values_q6667" "tertile_values_q3333_se"
## [19] "tertile_values_q6667_se"
survey_design_s$variables$IMC_tertile<-NA
survey_design_s$variables$IMC_tertile[survey_design_s$variables$IMC <= survey_design_s$variables$tertile_values_q3333]<-"t1"
survey_design_s$variables$IMC_tertile[
survey_design_s$variables$IMC > survey_design_s$variables$tertile_values_q3333 & survey_design_s$variables$IMC <= survey_design_s$variables$tertile_values_q6667
]<-"t2"
survey_design_s$variables$IMC_tertile[survey_design_s$variables$IMC > survey_design_s$variables$tertile_values_q6667]<-"t3"
table(survey_design_s$variables$IMC_tertile)
##
## t1 t2 t3
## 354 348 492
survey_design_s %>%
group_by(ENS,IMC_tertile)%>%
summarise(sum(fexp),
n())
## # A tibble: 9 × 4
## # Groups: ENS [3]
## ENS IMC_tertile `sum(fexp)` `n()`
## <fct> <chr> <dbl> <int>
## 1 2003 t1 525361. 91
## 2 2003 t2 517523. 89
## 3 2003 t3 512718. 120
## 4 2010 t1 390059. 98
## 5 2010 t2 381964. 128
## 6 2010 t3 385659. 195
## 7 2017 t1 577260. 165
## 8 2017 t2 583562. 131
## 9 2017 t3 568881. 177
#
# survey_design_s$variables %>%
# subset(is.na(IMC_tertile)) %>%
# select(ENS,IMC,IMC_tertile)
t1<-survey_design_s %>%
group_by(ENS,IMC_tertile)%>%
summarise(mean=mean(IMC),
median=median(IMC))
t1 %>%
ggplot(aes(x=ENS, y=mean, colour = IMC_tertile ))+
geom_point()

bmi change
t1w<-t1 %>%
pivot_wider(names_from = ENS, values_from = c(mean, median))
#
t1w$mean_d_2010_2003<-t1w$mean_2010-t1w$mean_2003
t1w$mean_d_2017_2003<-t1w$mean_2017-t1w$mean_2003
t1w$mean_d_2017_2010<-t1w$mean_2017-t1w$mean_2010
t1w$median_d_2010_2003<-t1w$median_2010-t1w$median_2003
t1w$median_d_2017_2003<-t1w$median_2017-t1w$median_2003
t1w$median_d_2017_2010<-t1w$median_2017-t1w$median_2010
t1wl<-t1w %>%
pivot_longer(!IMC_tertile, names_to = "var", values_to = "value")
t1wl$var_type<-NA
t1wl$var_type[grepl("mean",t1wl$var)]<-"mean"
t1wl$var_type[grepl("median",t1wl$var)]<-"median"
t1wl$var_type[grepl("mean_d",t1wl$var)]<-"mean_d"
t1wl$var_type[grepl("median_d",t1wl$var)]<-"median_d"
t1wl$var_time<-NA
t1wl$var_time[grepl("2003",t1wl$var)]<-"2003"
t1wl$var_time[grepl("2010",t1wl$var)]<-"2010"
t1wl$var_time[grepl("2017",t1wl$var)]<-"2017"
t1wl$var_time[grepl("2010_2003",t1wl$var)]<-"2010-2003"
t1wl$var_time[grepl("2017_2003",t1wl$var)]<-"2017-2003"
t1wl$var_time[grepl("2017_2010",t1wl$var)]<-"2017-2010"
t1wld<-t1wl %>%
subset(grepl("-",var_time))
t1wld %>%
ggplot(aes(x=var_time, y=value, colour = IMC_tertile ))+
geom_point()+
facet_grid(rows = vars(var_type))

survey_design_s$variables %>%
ggplot(aes(x=Edad, y=IMC, colour = ENS, shape = IMC_tertile))+
geom_point()
