rm(list = ls())
library(here)
## here() starts at D:/NIH_Floor study/R-training
library(readr)
library(readxl)
library(haven)
library(labelled)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(broom)
library(ggplot2)
stat_6mo <- read_dta(here("data", "data_6mo.dta"))
stat_6mo_factored <- as.data.frame(lapply(stat_6mo, to_factor))
excel_6mo <- read_excel(here("data", "data_6mo.xlsx"))
colnames_6mo <- c("dataid", "round", "breastfeed", "how_breastfeed", "formula", "solid_food",
"sit_up")
csv_6mo <- read_csv(here("data", "data_6mo.csv"))
## Rows: 203 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): dataid, q11_010, q11_020, q11_030, q11_60, q11_140, q11_240
## dbl (8): round, child_num, q11_011, q11_021, q11_031, q11_61, q11_141, q11_241
## date (1): q1_2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stat_birth_survey <- read_dta(here("data", "birth_survey.dta"))
stat_birth_factored <- as.data.frame(lapply(stat_birth_survey, to_factor))
stat_anthro <- read_dta(here("data", "anthro_data.dta"))
stat_anthro_factored <- as.data.frame(lapply(stat_anthro, to_factor))
?left_join
## starting httpd help server ... done
data_full_join <- full_join(stat_6mo_factored, stat_anthro)
## Joining with `by = join_by(dataid)`
data_full_join2 <- full_join(stat_birth_survey, stat_anthro)
## Joining with `by = join_by(dataid, child_id)`
birth_survey <- rename(stat_birth_survey, "birth_survey_status"=brith_survey_status)
merge_birth_anthr <- left_join(stat_anthro, stat_birth_survey,
by = c("dataid", "child_id"))
stat_birth_survey %>% head(6)
## # A tibble: 6 × 8
## dataid block child_id brith_survey_status reason_birth_survey_not…¹ DOB
## <chr> <chr> <chr> <dbl> <dbl+lbl> <date>
## 1 01016 01 t1 1 NA 2024-01-16
## 2 01017 01 t1 1 NA 2023-12-27
## 3 01018 01 t1 1 NA 2024-01-25
## 4 01020 01 t1 1 NA 2023-10-10
## 5 01021 01 t1 1 NA 2024-03-09
## 6 01022 01 t1 1 NA 2023-11-15
## # ℹ abbreviated name: ¹​reason_birth_survey_not_done
## # ℹ 2 more variables: q1_3 <dbl+lbl>, q1_4 <dbl+lbl>
stat_birth_survey %>% tail(6)
## # A tibble: 6 × 8
## dataid block child_id brith_survey_status reason_birth_survey_not…¹ DOB
## <chr> <chr> <chr> <dbl> <dbl+lbl> <date>
## 1 81775 81 t1 1 NA 2024-09-22
## 2 81785 81 t1 1 NA 2024-09-15
## 3 81789 81 t1 1 NA 2024-08-22
## 4 81790 81 t1 0 1 [The mother and child… 2024-08-20
## 5 81792 81 t1 1 NA 2024-11-16
## 6 81795 81 t1 1 NA 2024-09-05
## # ℹ abbreviated name: ¹​reason_birth_survey_not_done
## # ℹ 2 more variables: q1_3 <dbl+lbl>, q1_4 <dbl+lbl>
stat_birth_survey %>% dim()
## [1] 709 8
stat_birth_survey %>% nrow()
## [1] 709
dob_block_1 <- stat_birth_survey %>%
filter(block == "01") %>%
arrange(DOB) %>%
pull(DOB)
dob_block_1
## [1] "2023-10-10" "2023-10-23" "2023-11-15" "2023-12-27" "2024-01-14"
## [6] "2024-01-16" "2024-01-25" "2024-03-02" "2024-03-09" "2024-03-19"
breastfeed_6mo <- stat_6mo_factored %>%
filter(q11_010 == "Yes") %>%
nrow()
bf_6mo <- stat_6mo %>%
filter(q11_010 == 1) %>%
nrow()
bf_6mo
## [1] 203
bf_formula <- stat_6mo_factored %>%
filter(q11_010 =="Yes") %>%
filter(q11_010 =="Breastfeeding with formula milk") %>%
nrow()
bf_formula
## [1] 0
mean_head_circ <- merge_birth_anthr %>%
group_by(q1_3) %>%
summarise(count = n(),
mean_x = mean(head_circumference),
median_x = median(head_circumference),
sd_x = sd(head_circumference))
mean_head_circ
## # A tibble: 2 × 5
## q1_3 count mean_x median_x sd_x
## <dbl+lbl> <int> <dbl> <dbl> <dbl>
## 1 1 [Male] 82 42.3 42 1.12
## 2 2 [Female] 59 41.6 41 0.985
weight_child <- stat_anthro %>%
mutate(Child_weight_kg = baby_weight/1000)
weight_child
## # A tibble: 141 × 7
## dataid child_id date baby_weight head_circumference baby_length
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 01016 t1 2024-09-07 12:37:… 6700 41 68
## 2 01017 t1 2024-08-19 09:47:… 6712 40 68
## 3 01018 t1 2024-09-17 09:10:… 6780 41 69
## 4 01020 t1 2024-06-03 16:28:… 7033 42 69
## 5 01022 t1 2024-09-18 09:10:… 6082 41 67
## 6 01035 t1 2024-06-02 16:21:… 9933 45 73
## 7 01036 t1 2024-09-05 14:33:… 7410 41 67
## 8 02013 t1 2024-06-04 16:30:… 7213 44 68
## 9 02015 t1 2024-09-08 13:07:… 6838 41 66
## 10 02019 t1 2024-06-03 04:48:… 7900 44 68
## # ℹ 131 more rows
## # ℹ 1 more variable: Child_weight_kg <dbl>
stat_6mo <- stat_6mo %>%
mutate(twin_hh = if_else(child_num == 1, "single",
"tiwn"))
stat_6mo <- stat_6mo %>%
mutate(child_num = case_when(
child_num == 1 ~"single",
child_num == 2 ~"twins",
child_num == 3 ~"triplets"))
wide_data_by_sex <- mean_head_circ %>%
pivot_wider(names_from = q1_3,
values_from = mean_x)
wide_data_by_sex
## # A tibble: 2 × 5
## count median_x sd_x `1` `2`
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 82 42 1.12 42.3 NA
## 2 59 41 0.985 NA 41.6
q11_01_long <- stat_6mo %>%
select(dataid, child_num, q11_010, q11_011) %>%
pivot_longer(cols = c("q11_010", "q11_011"),
names_prefix = "q11_01",
names_to = "child_id",
values_to = "q11_01")
q11_01_long <- q11_01_long %>%
mutate(child_id = ifelse(child_id == 0, "t1", "t2"))
x <- c(25, 75)
y <- c(14, 50)
df <- data.frame(x, y)
matrix <- as.matrix(df)
martix <- matrix(x, y)
chisq.test(matrix)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: matrix
## X-squared = 0.07319, df = 1, p-value = 0.7867
bs_hc_male <- stat_birth_survey %>%
filter(q1_3 == 1) %>% pull(q1_4)
bs_hc_female <- stat_birth_survey %>%
filter(q1_3 == 2) %>% pull(q1_4)
t.test(bs_hc_male, bs_hc_female)
##
## Welch Two Sample t-test
##
## data: bs_hc_male and bs_hc_female
## t = -0.11905, df = 701.53, p-value = 0.9053
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03740469 0.03312792
## sample estimates:
## mean of x mean of y
## 0.9383378 0.9404762
logit_mod <- glm(q1_4 ~ q1_3,
family = binomial(link = "logit"),
data = stat_birth_survey)
summary(logit_mod)
##
## Call:
## glm(formula = q1_4 ~ q1_3, family = binomial(link = "logit"),
## data = stat_birth_survey)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.68487 0.48837 5.498 3.85e-08 ***
## q1_3 0.03757 0.31543 0.119 0.905
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 324.37 on 708 degrees of freedom
## Residual deviance: 324.35 on 707 degrees of freedom
## AIC: 328.35
##
## Number of Fisher Scoring iterations: 5
csvwashb <- read_csv(here("data", "washb_subset.csv"))
## Rows: 12438 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): tr, season
## dbl (1): diar7d
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wash_subset <- csvwashb %>%
mutate(tr= factor(tr, levels = c ("Control", #reference
"Water",
"Sanitation",
"Handwashing",
"Combined WSH")))
tr_mod <- glm(diar7d ~ tr, family = binomial(link="log"), data =wash_subset )
summary(tr_mod)
##
## Call:
## glm(formula = diar7d ~ tr, family = binomial(link = "log"), data = wash_subset)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.86755 0.06919 -41.444 < 2e-16 ***
## trWater -0.12261 0.12385 -0.990 0.322186
## trSanitation -0.52672 0.14644 -3.597 0.000322 ***
## trHandwashing -0.49691 0.14356 -3.461 0.000537 ***
## trCombined WSH -0.44799 0.10944 -4.094 4.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4444.1 on 12437 degrees of freedom
## Residual deviance: 4415.8 on 12433 degrees of freedom
## AIC: 4425.8
##
## Number of Fisher Scoring iterations: 6
hist(stat_anthro$baby_weight,
main="Histtogram of baby",
xlab = "Baby weight")
# ggpolt
ggplot(stat_anthro,
aes(x= baby_weight, y= baby_length))+
geom_point() +
xlab("Baby weight in gm") +
ylab("Baby lenght in cm") +
ggtitle("Six month Baby weight & Length") +
theme_minimal()
ggplot(merge_birth_anthr,
aes(x= baby_weight, y= baby_length,
group = factor(q1_3), color = factor(q1_3))) +
geom_point() +
xlab("Baby weight in gm") +
ylab("Baby lenght in cm") +
ggtitle("Six month Baby weight & Length")+
theme_minimal()
ggplot(merge_birth_anthr,
aes(x= baby_weight, y= baby_length,
group = factor(q1_3, levels = c(1,2)), color = factor(q1_3))) +
geom_point(size = 3, shape = 1) +
scale_color_manual(values = c("blue", "red"),
labels = c("Male", "Female")) +
xlab("Baby weight in gm") +
ylab("Baby lenght in cm") +
ggtitle("Six month Baby weight & Length")+
theme_minimal()
stat_birth_factored <- stat_birth_survey %>%
mutate(sex = factor(q1_3,
levels = c(1,2),
labels = c("Male", "Female")))
merge_birth_anthr <- left_join(stat_anthro, stat_birth_factored, by = c("dataid", "child_id"))
ggplot(merge_birth_anthr,
aes(x= sex, y= baby_weight,
group = sex,
colour = sex)) +
geom_boxplot() +
scale_color_manual(values = c("blue", "red"),
labels = c("Male", "Female")) +
xlab("Sex") +
ylab("Baby weight in gm") +
ggtitle("Six month Baby weight by sex")+
theme_minimal()
library(ggplot2)
ggplot2::ggplot(merge_birth_anthr,
aes(x=baby_weight,
y=baby_length,
colour = sex)) +
geom_point(size =2) +
facet_wrap(~ sex) +
xlab("Baby weight in gm") +
ylab("Sex") +
ggtitle("Six month Baby weight by sex") +
theme_minimal()
model3 <- tidy(tr_mod)
model4 <- model3 %>%
filter(term != "(Intercept)") %>%
mutate(prev_ratio = exp(estimate),
lower_ci = exp(estimate - 1.96*std.error),
upper_ci = exp(estimate + 1.96*std.error))
ggplot(model4, aes(x = term,
y = prev_ratio,
group = term,
colour = term)) +
geom_point(stat = "identity", color ="blue", fill = "#999999") +
geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci,
width = 0.2),
position = position_dodge(0.5)
) +
xlab("Diarrhea Prevalence Ratio (95% CI)") +
ylab("Treatment")
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.