By Stanford Chihuri
install.packages("foreign")
## Installing package into '/Users/stanfordchihuri/Library/R/3.0/library'
## (as 'lib' is unspecified)
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
library(foreign)
install.packages("sas7bdat")
## Installing package into '/Users/stanfordchihuri/Library/R/3.0/library'
## (as 'lib' is unspecified)
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
library(sas7bdat)
folate<-read.sas7bdat("/Users/stanfordchihuri/Downloads/merged6.sas7bdat", debug=FALSE)
library(ggplot2)
qplot(RIDRETH1,data=folate, fill=as.factor(AboveUL_DR11), binwidth = 0.2, x=)
qplot(TFDANDSUP1_DFE, data=folate, facets=RIDRETH1~.,binwidth=2, xlim=c(0,2000))
qplot(TFDANDSUP1_DFE, data=folate ,facets=.~RIDRETH1, binwidth=4)
qplot(TFDANDSUP1_DFE, data=folate, geom="density", color=as.factor(DR1LANG), xlim=c(0,2000))
## Warning: Removed 60 rows containing non-finite values (stat_density).
## Warning: Removed 14 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 120 rows containing non-finite values (stat_density).
legend_title<-"race/ethn"
qplot(DMDYRSUS,TFDANDSUP1_DFE, data=folate, color=, geom=c("point","smooth"),method="lm", ylim=c(0,2000), xlim=c(1,5), binwidth=0.2, ylab="Folate from food and supplements (DFE)", xlab="Length of time in the US (yrs)")
## Error in eval(expr, envir, enclos): argument is missing, with no default
qplot(DMDYRSUS,TFDANDSUP1_DFE, data=folate, facets=.~RIAGENDR, geom=c("point","smooth"),method="lm", ylim=c(0,2000), xlim=c(1,5), binwidth=0.2, ylab="Folate from food and supplements (DFE)", xlab="Length of time in the US (yrs)", main="Total folate intake by length of time in the US and gender", cex.lab=0.75, cex.main=0.8)
## Warning: Removed 1444 rows containing missing values (stat_smooth).
## Warning: Removed 1379 rows containing missing values (stat_smooth).
## Warning: Removed 1444 rows containing missing values (geom_point).
## Warning: Removed 1379 rows containing missing values (geom_point).
install.packages("scatterplot3d")
## Installing package into '/Users/stanfordchihuri/Library/R/3.0/library'
## (as 'lib' is unspecified)
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
library(scatterplot3d, RIDAGEEX, RIDRETH1, TFDANDSUP1_DFE)
## Error in library(scatterplot3d, RIDAGEEX, RIDRETH1, TFDANDSUP1_DFE): object 'TFDANDSUP1_DFE' not found
scatterplot3d(x=folate$RIDAGEEX, y=folate$TFDANDSUP1_DFE, z=folate$BMIzscore, main='3d scatterplot of total folate vs. age and BMIzscores', cex.main=0.9, xlim=NULL, ylim=NULL, zlim=NULL, xlab='age(mo)', ylab='BMIzscore',zlab='total folate', angle=45, axis=TRUE, las=1, cex.axis=0.5, cex.lab=0.8, font.main=1)
## Error in eval(expr, envir, enclos): could not find function "scatterplot3d"
boxplot(folate$TFDANDSUP1_DFE~folate$RIAGENDR, las=1, xlab="gender", ylab="total folate", ylim=c(0,2000))
boxplot(folate$TFDANDSUP1_DFE~folate$RIDRETH1, las=1, xlab="race/ethn", ylab="total folate", ylim=c(0,2000))
table1<-table(folate$BelowEAR_DR11, folate$RIDRETH1)
table1
##
## 1 2 3 4 5
## 0 414 183 554 312 122
## 1 116 37 98 65 16
op<-par(cex=.6)
barplot(table1, beside=T, main="Folate intake <EAR by Race/ethn", xlab="Race/ethn", las=1, col=c(5,4), ylab="freq Below EAR", legend.text = c("Above EAR","Below EAR"))
mosaicplot(table1)
hist(folate$agecat, prob=T, main="Histogram of age categories", xlab="Age categories", col=13, las=1, cex.axis=0.5, cex.lab=0.8, cex.main=0.9)
box()
table(folate$agecat)
##
## 1 2 3 4 5
## 516 430 602 934 509
x<-c(folate$agecat, folate$RIDAGEEX)
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.00 14.50 54.71 105.00 204.00
qqnorm(x)
t.lm<-lm(TFDANDSUP1_DFE~RIDRETH1+agecat+incpovratio+BMIz, data=folate)
resid<-resid(t.lm)
plot(resid, col=4, xlab="Fitted Model", ylab="Residuals", main="Residuals vs fitted model", las=1)
abline(0,0, cex.font=1.5)
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...):
## "cex.font" is not a graphical parameter
m1 <- lm(TFDANDSUP1_DFE~BMIz+agecat+incpovratio+RIAGENDR+DR1LANG, data=folate) #Create a linear model
plot(density(resid(m1)))
qqnorm(resid(m1), col=4) # A quantile normal plot - good for checking normality
qqline(resid(m1))
head(folate)
## SEQN WTDRD1 WTDR2D DR1DRSTZ DR1LANG DR1TKCAL DR1TFOLA DR1TFA
## 1 51625 52953.303 127261.658 1 1 1829 386 184
## 2 51626 29699.653 43756.077 1 1 1852 1215 1028
## 3 51627 25592.859 45085.324 1 1 3752 583 353
## 4 51632 NaN NaN 5 NaN NaN NaN NaN
## 5 51634 5983.712 5051.341 1 3 2557 296 157
## 6 51638 18101.569 14060.098 1 1 1818 665 481
## DR1TFF DR1TFDFE DS1DS DS1TFA DS1TFDFE BMXBMI DR2DRSTZ DR2LANG DR2TKCAL
## 1 200 518 2 0 NaN 15.30 1 1 1864
## 2 187 1935 2 0 NaN 22.00 1 1 2183
## 3 230 831 1 NaN NaN 18.22 1 1 2426
## 4 NaN NaN NaN NaN NaN 13.21 5 NaN NaN
## 5 138 407 2 0 NaN 21.68 1 2 1878
## 6 185 1002 2 0 NaN 16.82 1 1 1964
## DR2TFOLA DR2TFA DR2TFF DR2TFDFE DS2DS DS2TFA DS2TFDFE RIAGENDR RIDAGEEX
## 1 134 36 98 158 2 0 NaN 1 50
## 2 542 408 134 826 2 0 NaN 1 202
## 3 272 74 198 324 1 NaN NaN 1 132
## 4 NaN NaN NaN NaN NaN NaN NaN 1 126
## 5 271 95 177 336 2 0 NaN 1 121
## 6 411 296 116 618 2 0 NaN 1 116
## RIDRETH1 DMDBORN2 DMDCITZN DMDYRSUS DMDEDUC3 DMDSCHOL DMDHHSIZ DMDFMSIZ
## 1 5 1 1 NaN NaN NaN 4 4
## 2 4 1 1 NaN 8 1 4 4
## 3 4 1 1 NaN 4 1 6 6
## 4 2 1 1 NaN 3 1 5 5
## 5 1 1 1 NaN 5 1 4 4
## 6 3 1 1 NaN 3 1 7 7
## INDHHIN2 INDFMIN2 INDFMPIR DMDHREDU SIALANG SIAPROXY FIALANG FIAPROXY
## 1 5 5 1.07 5 1 1 1 2
## 2 8 8 2.27 1 1 2 1 2
## 3 5 5 0.81 3 1 1 1 2
## 4 10 10 2.68 5 1 1 1 2
## 5 5 5 0.93 1 2 1 1 2
## 6 14 14 1.84 3 1 1 1 2
## FIAINTRP MIALANG MIAPROXY MIAINTRP AIALANG WTINT2YR WTMEC2YR SDMVPSU
## 1 2 NaN NaN NaN NaN 53901.104 56995.035 2
## 2 2 1 2 2 1 13953.078 14509.279 1
## 3 2 1 2 2 NaN 11664.899 12041.635 2
## 4 2 1 2 2 NaN 8056.943 8175.946 2
## 5 2 1 2 2 NaN 9805.508 10232.612 1
## 6 2 1 2 2 NaN 29727.784 30213.356 1
## SDMVSTRA LBDRBF LBXRBFSI LBDFOL LBXFOLSI agecat TFADANDSUPP1
## 1 79 472.4 1070 18.8 42.6 2 184
## 2 84 158.5 359 14.2 32.2 5 1028
## 3 86 450.3 1020 15.1 34.3 4 353
## 4 88 640.2 1450 26.8 60.7 4 NaN
## 5 86 507.7 1150 15.1 34.1 4 157
## 6 88 534.2 1210 31.3 71.0 4 481
## TFADANDSUPP2 TFDANDSUP1_DFE TFDANDSUP2_DFE MDRTKCAL MDRTFA MDSTFA
## 1 36 518 158 1846.5 110.0 0
## 2 408 1935 826 2017.5 718.0 0
## 3 74 831 324 3089.0 213.5 NaN
## 4 NaN NaN NaN NaN NaN NaN
## 5 95 407 336 2217.5 126.0 0
## 6 296 1002 618 1891.0 388.5 0
## MTFADANDSUPP MTFDANDSUP_DFE MDRTFF rbcfolate serumfolate DQL_ENGLISH
## 1 110.0 338.0 149.0 472.4 18.8 1
## 2 718.0 1380.5 160.5 158.5 14.2 1
## 3 213.5 577.5 214.0 450.3 15.1 1
## 4 NaN NaN NaN 640.2 26.8 NaN
## 5 126.0 371.5 157.5 507.7 15.1 2
## 6 388.5 810.0 150.5 534.2 31.3 1
## DQL_SPANISH DR1 BelowEAR_DR1 AboveUL_DR1 AboveUL_DR2 AboveUL_DR1AND2
## 1 1 1 0 0 0 0
## 2 1 0 0 1 0 0
## 3 1 1 0 0 0 0
## 4 NaN 0 NaN NaN NaN NaN
## 5 1 1 0 0 0 0
## 6 1 1 0 0 0 0
## BelowEAR_DR2 BelowEAR_DR1AND2 BMIzscore BMIz AboveBelowDR1
## 1 1 0 -0.8135215 2 1
## 2 0 0 0.5329886 3 2
## 3 0 0 -0.2266843 2 1
## 4 NaN NaN -1.2335523 2 NaN
## 5 0 0 0.4686777 3 1
## 6 0 0 -0.5080446 2 1
## householdincome householdincome2 incpovratio oldkids AboveUL_DR11
## 1 2 2 2 NaN 0
## 2 4 2 3 1 1
## 3 2 2 1 NaN 0
## 4 5 2 3 NaN NaN
## 5 2 2 1 NaN 0
## 6 5 2 2 NaN 0
## BelowEAR_DR11
## 1 NaN
## 2 0
## 3 0
## 4 NaN
## 5 0
## 6 0