df = read.csv("~/Dropbox/VOS Study/Diabetes Incidence/Diabetes Incidence Data.csv")
df$diab = ifelse(df$v2.diab1=="Diabetes", 1, 0)
df %>% filter(sex=="F") %>% lm(wbbmd ~ age + pcfat, data = .) %>% summary()
##
## Call:
## lm(formula = wbbmd ~ age + pcfat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31164 -0.06267 -0.00546 0.05976 0.31990
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3478426 0.0260886 51.664 <2e-16 ***
## age -0.0065742 0.0002759 -23.827 <2e-16 ***
## pcfat -0.0004123 0.0005818 -0.709 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08938 on 1282 degrees of freedom
## Multiple R-squared: 0.319, Adjusted R-squared: 0.318
## F-statistic: 300.3 on 2 and 1282 DF, p-value: < 2.2e-16
df %>%
filter(sex=="F") %>%
ggplot(aes(x=age, y=wbbmc)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# freq(iris$Species, plain.ascii = FALSE, style = "rmarkdown")
freq(df$v2.diab1, report.nas = FALSE, headings = FALSE)
##
## Freq % % Cum.
## ------------------ ------ -------- --------
## Diabetes 80 4.33 4.33
## Normal 1144 61.94 66.27
## Pre-Diabetes 623 33.73 100.00
## Total 1847 100.00 100.00
ctable(x=df$sex, y=df$v2.diab1, prop="r")
## Cross-Tabulation, Row Proportions
## sex * v2.diab1
## Data Frame: df
##
## ------- ---------- ----------- -------------- -------------- ---------------
## v2.diab1 Diabetes Normal Pre-Diabetes Total
## sex
## F 53 (4.1%) 810 (63.0%) 422 (32.8%) 1285 (100.0%)
## M 27 (4.8%) 334 (59.4%) 201 (35.8%) 562 (100.0%)
## Total 80 (4.3%) 1144 (61.9%) 623 (33.7%) 1847 (100.0%)
## ------- ---------- ----------- -------------- -------------- ---------------