# Descriptive analysis for Depression/life style factors paper
# by Kevin Linares: Jan 2015
library(foreign)
setwd("~/Dropbox/My documents/projects/UP Amigos/depression paper/working data")
DepData = read.dta("2008 2009 data for model.dta")
colnames(DepData) = tolower(colnames(DepData))
attach(DepData)
#####################################################################
### basic summary descriptives of BMI and WC for 2008 & 2009 ####
#####################################################################
# our sample, N=339
sex.t1 = table(sex)
sex.t2 = table(sex_t2)
cbind(sex.t1, sex.t2)
## sex.t1 sex.t2
## Male 128 129
## Female 211 210
# participant clave 163528 selected female in T1 & male in T2
# Histogram for gender at T1 & T2
par(mfrow=c(2,1), mar=c(4,4,2,1))
barplot(sex.t1, col=rep(c("light blue", 'light pink')),
main="Number of males and females in 2008")
barplot(sex.t2, col=rep(c("light blue", 'light pink')),
main="Number of males and females in 2009")

# Age for T1 & T2 descriptives
par(mfrow=c(1,1), mar=c(4,4,3,2))
age_stats = summary(age)
age_stats_T2 = summary(calc_age_t2)
cbind(age_stats, age_stats_T2)
## age_stats age_stats_T2
## Min. 16.00 17.00
## 1st Qu. 17.00 18.00
## Median 18.00 18.00
## Mean 17.75 18.32
## 3rd Qu. 18.00 19.00
## Max. 20.00 20.00
boxplot(cbind(age_stats, age_stats_T2), names=c("Age 2008", "Age 2009"),
col="light salmon", ylab='Ages', main='Age for 2008, 2009 (in years)',
cex.main=1.50, ylim=c(16, 20))

# boxplots of Age by gender
par(mfrow=c(2,1), mar=c(4,4,3,2))
boxplot(age~sex, data=DepData, col=rep(c("light blue", 'light pink')),
xlab='2008', ylab='Ages')
title(main='Age by gender in 2008 & 2009', cex.main=1.75)
boxplot(calc_age_t2 ~ sex_t2, data=DepData, col=rep(c("light blue", 'light pink')),
xlab='2009', ylab='Ages')

# frequency of males & females by age in 2008
par(mfrow=c(2,1), mar=c(4,4,3,1))
freq_age_gender_2008 = table(age, sex)
freq_age_gender_2008
## sex
## age Male Female
## 16 4 13
## 17 45 78
## 18 57 79
## 19 17 38
## 20 5 3
plot(freq_age_gender_2008, col=rep(c("light blue", 'light pink')), xlab='Age in 2008',
ylab='Sex', main='Frequency of age group by gender in 2008')
freq_age_gender_2009 = table(calc_age_t2, sex_t2)
freq_age_gender_2009
## sex_t2
## calc_age_t2 Male Female
## 17 21 40
## 18 50 93
## 19 41 60
## 20 17 17
plot(freq_age_gender_2009, col=rep(c("light blue", 'light pink')),
xlab='Age in 2009', ylab='Sex', main='Frequency of age group by gender in 2009')

##########################
### Family Variables ###
##########################
# marital status in 2008 & 2009
marital = table(marital_status, useNA = "ifany")
marital_t2 = table(marital_status_t2, useNA = "ifany")
cbind(marital, marital_t2)
## marital marital_t2
## Single 267 330
## Married 0 3
## Cohabitation 0 2
## Divorced 0 1
## <NA> 72 3
# family income in 2008 & 2009
# (2008 metric was transformed into 2009 metric using preexisting labels)
table(sort(income), exclude=NULL)
##
## 500 1000 2000 2500 3000 3200 3400 3500 3950 4000 4200 4500
## 1 1 4 5 10 1 1 5 1 15 1 2
## 4600 4800 5000 5500 5600 6000 6200 6500 6600 7000 7500 8000
## 1 1 28 1 1 13 2 4 1 11 2 17
## 8500 9000 9300 9500 9600 10000 12000 13000 13400 14000 15000 16000
## 3 5 1 1 1 45 11 3 1 4 25 1
## 17000 18000 19000 20000 22000 23000 24000 25000 28000 30000 32000 35000
## 1 5 2 18 3 1 2 6 1 6 1 1
## 40000 50000 58000 60000 80000 <NA>
## 2 2 1 1 1 0
summary(income_t2)
## Less than $10,000 $10,000 - $14,999 $15,000 - $19,999 $20,000 - $24,999
## 143 74 43 21
## $25,000 - $49,999 More than $50,000 NA's
## 18 2 38
income.new=c(income)
names(income.new) = c("Less than $10,000", "$10,000 - $14,999", "$15,000 - $19,999",
"$20,000 - $24,999","$25,000 - $49,999")
income.new[income >= 0] = "Less than $10,000"
income.new[income <= 9999] = "Less than $10,000"
income.new[income >= 10000 & income <= 14999] = "$10,000 - $14,999"
income.new[income >= 15000 & income <= 19999] = "$15,000 - $19,999"
income.new[income >= 20000 & income <= 24999] = "$20,000 - $24,999"
income.new[income >= 25000 & income <= 49999] = "$25,000 - $49,999"
income.new[income >= 50000] = "More than $50,000"
income.new=factor(income.new, levels=c("Less than $10,000", "$10,000 - $14,999",
"$15,000 - $19,999",
"$20,000 - $24,999","$25,000 - $49,999",
"More than $50,000"))
summary(income.new)
## Less than $10,000 $10,000 - $14,999 $15,000 - $19,999 $20,000 - $24,999
## 140 64 34 24
## $25,000 - $49,999 More than $50,000 NA's
## 17 5 55
cbind(table(income.new, useNA = "ifany"), table(income_t2, useNA = "ifany"))
## [,1] [,2]
## Less than $10,000 140 143
## $10,000 - $14,999 64 74
## $15,000 - $19,999 34 43
## $20,000 - $24,999 24 21
## $25,000 - $49,999 17 18
## More than $50,000 5 2
## <NA> 55 38
# for any analysis use income_t2 from 2009 dataset, 2008 is questionable
### Parental Education in 2008 & 2009 ###
cbind(table(education_father, useNA = "ifany"), table(education_father_t2,
useNA = "ifany"),
table(education_mother, useNA = "ifany"), table(education_mother_t2,
useNA = "ifany"))
## [,1] [,2] [,3] [,4]
## none 144 4 112 8
## grade school 59 39 74 53
## middle school 66 69 81 75
## high school 0 44 1 48
## technical/career school 47 48 62 71
## 4 years of college education 0 101 0 77
## masters or doctorate 0 17 0 4
## do not know 0 11 0 0
## <NA> 23 6 9 3
cbind(table(work_father_t2), table(work_mother_t2))
## [,1] [,2]
## worked 267 175
## did not work, but has work 7 7
## sought work 3 5
## was a student 2 1
## was dedicated to housework 2 126
## was retired or a pensioner 17 15
## was/is permanently unable to work due to disability 2 0
## do no know or prefer to not respond 33 6
#################################################
##### descriptives on variables of interest #####
#################################################
par(mfrow=c(1,1), mar=c(4,5,4,2))
pa_counts <- table(pa_times_week)
barplot(pa_counts, density=15, xlab="Frequency", ylab="PA Counts",
main="Physical Activity Distribution for 1 week",
horiz=TRUE, col=c("darkseagreen1"))

pa_counts
## pa_times_week
## 0 1 2 3 4 5 6 7
## 144 66 19 33 17 25 12 4
# PA counts by sex in 2008
pa_counts_sex <- table(pa_times_week, sex)
barplot(pa_counts_sex, density=20, xlab="Sex", ylab="PA Counts",
main="Physical Activity Distribution for 1 week", col=cm.colors(8),
legend = rownames(pa_counts_sex), beside=TRUE)

pa_counts_sex
## sex
## pa_times_week Male Female
## 0 46 98
## 1 29 37
## 2 5 14
## 3 10 23
## 4 9 8
## 5 10 15
## 6 8 4
## 7 2 2
table(pa_duration)
## pa_duration
## 0 1 2 3 5 15 18 20 25 30 40 45 50 60 75 90 100 120
## 144 2 4 2 2 8 1 15 2 35 12 6 11 36 1 5 1 32
## 150 180 360 500
## 1 3 1 1
hist(pa_duration, density=25, breaks=20, xlab="minutes", ylab="PA Counts",
main="Physical Activity Distribution for 1 week", col=c("cadetblue2"))

# TV, Internet, & videogame
table(tv_watching_hours)
## tv_watching_hours
## 0 0.5 1 2 3 4 5 6 7 8 9 10
## 19 4 76 116 60 31 16 3 3 4 1 1
table(videogame_hours)
## videogame_hours
## 0 0.5 1 2 3 4 5 6 8
## 47 4 116 84 36 25 9 8 1
tv_vidgame = table(tv_watching_hours, videogame_hours)
tv_vidgame
## videogame_hours
## tv_watching_hours 0 0.5 1 2 3 4 5 6 8
## 0 10 1 4 2 2 0 0 0 0
## 0.5 0 0 0 2 0 1 0 0 0
## 1 12 2 29 21 4 3 2 2 1
## 2 9 1 51 26 16 10 2 0 0
## 3 9 0 15 18 7 6 3 2 0
## 4 2 0 12 6 4 2 1 3 0
## 5 2 0 5 3 2 3 0 0 0
## 6 1 0 0 2 0 0 0 0 0
## 7 1 0 0 1 1 0 0 0 0
## 8 0 0 0 3 0 0 0 1 0
## 9 0 0 0 0 0 0 1 0 0
## 10 1 0 0 0 0 0 0 0 0
par(mfrow=c(2,1), mar=c(4,5,4,2))
hist(tv_watching_hours, density=20, breaks=10, xlab="Hours", ylab="Frequency",
main="Frequency for hours spent watching TV",
col=c("cadetblue4"))
hist(tv_vidgame, density=20, breaks=10, xlab="Hours", ylab="Frequency",
main="Frequency for hours spent playing videogames",
col=c("cadetblue4"))

par(mfrow=c(1,1), mar=c(4,5,4,2))
# internet & videogame variables 2009 data
table(internet2_t2)
## internet2_t2
## Daily Two to three times per week
## 127 115
## Weekly Monthly
## 52 25
## Less than once a month
## 19
names_int_t2 = c("Daily", "2-3 times per week", "Weekly", "Monthly", "< once a month")
barplot(table(internet2_t2), names=names_int_t2, density=20, col="seagreen3",
xlab="Often internet use", ylab="Frequency", cex.names=.75,
main="Barplot for how often Internet is used")

## By sex
par(mfrow=c(2,1), mar=c(4,5,4,2))
internet2_t2_counts = table(sex_t2, internet2_t2)
internet_t2_relfreq = internet2_t2_counts/sum(internet2_t2_counts)
barplot(internet2_t2_counts, names=names_int_t2, density=15,
col=c("seagreen2", "royalblue1"), xlab="Often internet use",
ylab="Frequency", cex.names=.75,
main="Barplot for how often Internet is used by sex")
legend('topright', rownames(internet2_t2_counts), density=15,
fill=c("seagreen2", "royalblue1"))
barplot(internet_t2_relfreq, names=names_int_t2, density=15,
col=c("seagreen2", "royalblue1"), xlab="Often internet use",
ylab="Relative Frequency", cex.names=.75,
main="Relative Frequency Boxplot for how often Internet is used")

par(mfrow=c(1,2), mar=c(4,4,3,2))
count_tv_week_t2 = table(tv_week_t2)
count_tv_week_t2
## tv_week_t2
## 0 min 15 min 30 min 1 hr
## 8 14 22 99
## 2 hrs 3 hrs 4 hrs More than 5 hrs
## 94 52 31 18
lbls_tv_week_t2 = names(count_tv_week_t2)
pct_tv_week_t2 = round(count_tv_week_t2/sum(count_tv_week_t2)*100)
lbls_tv_week_t2 = paste(lbls_tv_week_t2, pct_tv_week_t2)
lbls_tv_week_t2 = paste(lbls_tv_week_t2,"%",sep="")
pie(count_tv_week_t2, labels = lbls_tv_week_t2, col=cm.colors(8),
main="Pie chart for how often
television is watched during the week")
count_tv_weekend_t2 = table(tv_weekend_t2)
count_tv_weekend_t2
## tv_weekend_t2
## 0 min 15 min 30 min 1 hr
## 13 11 21 39
## 2 hrs 3 hrs 4 hrs More than 5 hrs
## 81 86 51 34
lbls_tv_weekend_t2 = names(count_tv_weekend_t2)
pct_tv_weekend_t2 = round(count_tv_weekend_t2/sum(count_tv_weekend_t2)*100)
lbls_tv_weekend_t2 = paste(lbls_tv_weekend_t2, pct_tv_weekend_t2)
lbls_tv_weekend_t2 = paste(lbls_tv_weekend_t2,"%",sep="")
pie(count_tv_weekend_t2, labels = lbls_tv_weekend_t2, col=cm.colors(8),
main="Pie chart for how often
television is watched on weekends")

count_game_week_t2 = table(game_week_t2)
count_game_week_t2
## game_week_t2
## 0 min 15 min 30 min 1 hr
## 53 23 45 80
## 2 hrs 3 hrs 4 hrs More than 5 hrs
## 60 47 22 9
lbls_game_week_t2 = names(count_game_week_t2)
pct_game_week_t2 = round(count_game_week_t2/sum(count_game_week_t2)*100)
lbls_game_week_t2 = paste(lbls_game_week_t2, pct_game_week_t2)
lbls_game_week_t2 = paste(lbls_game_week_t2,"%",sep="")
pie(count_game_week_t2, labels = lbls_game_week_t2, col=topo.colors(8),
main="Pie chart for how often
videogames are played during the week")
count_game_weekend_t2 = table(game_weekend_t2)
count_game_weekend_t2
## game_weekend_t2
## 0 min 15 min 30 min 1 hr
## 85 23 44 59
## 2 hrs 3 hrs 4 hrs More than 5 hrs
## 53 36 18 17
lbls_game_weekend_t2 = names(count_game_weekend_t2)
pct_game_weekend_t2 = round(count_game_weekend_t2/sum(count_game_weekend_t2)*100)
lbls_game_weekend_t2 = paste(lbls_game_weekend_t2, pct_game_weekend_t2)
lbls_game_weekend_t2 = paste(lbls_game_weekend_t2,"%",sep="")
pie(count_game_weekend_t2, labels = lbls_game_weekend_t2, col=topo.colors(8),
main="Pie chart for how often
videogames are played on weekends")

## contingency tables for watching TV during meals & if tv is visible
# tv1_t2 = is there a tele present from where they usually eat
# tv2_t2 = do they watch the tele when they eat
table(tv1_t2, useNA = "ifany")
## tv1_t2
## yes no <NA>
## 197 137 5
table(tv2_t2, useNA = "ifany")
## tv2_t2
## yes no <NA>
## 153 179 7
freq_tv1_t2 = table(tv1_t2)
freq_tv2_t2 = table(tv2_t2)
table(is.na(tv1_t2))
##
## FALSE TRUE
## 334 5
table(is.na(tv2_t2))
##
## FALSE TRUE
## 332 7
relfreq_tv1_t2 = freq_tv1_t2/334 # 334 total valid responses
relfreq_tv2_t2 = freq_tv2_t2/334 # 334 total valid responses
cbind(freq_tv1_t2, relfreq_tv1_t2 ,freq_tv2_t2, relfreq_tv2_t2)
## freq_tv1_t2 relfreq_tv1_t2 freq_tv2_t2 relfreq_tv2_t2
## yes 197 0.5898204 153 0.4580838
## no 137 0.4101796 179 0.5359281
## conditional distribution
library(gmodels)
joint = CrossTable(tv1_t2, tv2_t2, prop.chisq=TRUE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 332
##
##
## | tv2_t2
## tv1_t2 | yes | no | Row Total |
## -------------|-----------|-----------|-----------|
## yes | 141 | 55 | 196 |
## | 28.430 | 24.300 | |
## | 0.719 | 0.281 | 0.590 |
## | 0.922 | 0.307 | |
## | 0.425 | 0.166 | |
## -------------|-----------|-----------|-----------|
## no | 12 | 124 | 136 |
## | 40.972 | 35.021 | |
## | 0.088 | 0.912 | 0.410 |
## | 0.078 | 0.693 | |
## | 0.036 | 0.373 | |
## -------------|-----------|-----------|-----------|
## Column Total | 153 | 179 | 332 |
## | 0.461 | 0.539 | |
## -------------|-----------|-----------|-----------|
##
##
joint
## $t
## y
## x yes no
## yes 141 55
## no 12 124
##
## $prop.row
## y
## x yes no
## yes 0.71938776 0.28061224
## no 0.08823529 0.91176471
##
## $prop.col
## y
## x yes no
## yes 0.92156863 0.30726257
## no 0.07843137 0.69273743
##
## $prop.tbl
## y
## x yes no
## yes 0.42469880 0.16566265
## no 0.03614458 0.37349398
## joint distribution plot
par(mfrow=c(1,1), mar=c(2,3,2,1))
joint_cnts = joint$t
barplot(joint_cnts, beside=FALSE, density=20, col=cm.colors(2), ylab='Frequency',
xlab='TV visible from where they eat',
main='Television watching during meals: stacked Bar plot')
legend("bottomleft", c('TV during meals', 'No TV during meals'),
density=20, cex=.75, pch=10, col=cm.colors(2))

# Most people that have a tele visible (141) from where they eat,
## tend to watch tele during meals
## conditional distribution plot
WatchTV_given_TVvis = joint$prop.col
barplot(WatchTV_given_TVvis, beside=FALSE, density=20, col=cm.colors(2),
ylab='Proportion of participants that watch TV during meals',
xlab='TV visible from where they eat',
main='Proportion of TV watching while eating given TV visible')
legend("bottomleft", c('TV during meals', 'No TV during meals'), density=20,
cex=.75, pch=10, col=cm.colors(2))

# The proportion attributed too for watching tv during meals is .92%
## given that a tv is visible from where they eat.
# The proportion attributed too NOT watching tv during meals is .69%
## given that a tv is NOT visible from where they eat
# some extra variables of interest #
table(frs_t2, useNA = "ifany")
## frs_t2
## 1 2 3 4 5 6 7 <NA>
## 4 40 103 116 58 12 5 1
table(self_health_t2, useNA = "ifany")
## self_health_t2
## A B C D E F
## 65 213 58 1 2 0
table(fastfood_t2, useNA = "ifany")
## fastfood_t2
## Never 1-2 times 3-4 times 5-6 times 7 times <NA>
## 135 165 20 9 1 9
hist(frs_t2, density=20, breaks=7, col=terrain.colors(7),
main='Histogram for figure scale')

barplot(table(self_health_t2), density=25, col=cm.colors(6),
main='Barplot for self health scale')

barplot(table(fastfood_t2), density=25, col=cm.colors(5),
main='Barplot for fast food consumption')

# weight status variables (BMI, waist circumference)
# saves descriptives of bmi and wc at T1 & T2 as summary_T1/T2
summary_T1 = summary(cbind(bmi, wc))
summary_T2 = summary(cbind(bmi_t2, wc_t2))
cbind(summary_T1, summary_T2)
## bmi wc bmi_t2 wc_t2
## "Min. :15.05 " "Min. : 61.00 " "Min. :16.44 " "Min. : 58.50 "
## "1st Qu.:20.24 " "1st Qu.: 74.00 " "1st Qu.:20.53 " "1st Qu.: 71.00 "
## "Median :22.55 " "Median : 81.00 " "Median :22.60 " "Median : 77.00 "
## "Mean :23.24 " "Mean : 82.83 " "Mean :23.54 " "Mean : 79.16 "
## "3rd Qu.:25.68 " "3rd Qu.: 89.00 " "3rd Qu.:25.59 " "3rd Qu.: 85.00 "
## "Max. :41.91 " "Max. :125.00 " "Max. :40.33 " "Max. :132.00 "
## NA "NA's :2 " NA "NA's :4 "
par(mfrow=c(4,1), mar=c(4,5,2,1))
hist(subset(bmi, sex=="Male"), col="dark blue", density=20, breaks=20,
prob=TRUE, xlab='BMI',
main="Histogram for BMI among males in 2008 with density curve")
lines(density(bmi),col = "red")
hist(subset(bmi, sex=="Female"), col="dark green", density=20, breaks=20,
prob=TRUE, xlab='BMI',
main="Histogram for BMI among females in 2008 with density curve")
lines(density(bmi),col = "red")
hist(subset(bmi_t2, sex_t2=="Male"), col="dark blue", density=20, breaks=20,
prob=TRUE, xlab='BMI',
main="Histogram for BMI among males in 2009 with density curve")
lines(density(bmi_t2),col = "red")
hist(subset(bmi_t2, sex_t2=="Female"), col="dark green", density=20, breaks=20,
prob=TRUE, xlab='BMI',
main="Histogram for BMI among females in 2009 with density curve")
lines(density(bmi_t2),col = "red")

# scatter plots of BMI & WC, BMI & BMI_t2, wc & wc_t2, and BMI_t2 & WC_t2 by gender
par(mfrow=c(1,1), mar=c(5,5,4,1))
# BMI x WC
with(DepData, plot(bmi, wc, pch=18, col=rep(c("blue", "darkgreen")),
xlab='BMI in 2008', ylab='Waist circumference (in Centimeters)'))
title(main='Scatterplot of BMI & Waist Circumference (2008)
BMI regression line set to 25 (>25 = overweight/obese)', cex.main = 1.25)
legend('topleft', c("BMI", "Waist Circumference"), pch=c(18),
col=c("blue", "darkgreen"), cex=1.00)
abline(lsfit(bmi, wc))
## Warning in lsfit(bmi, wc): 2 missing values deleted
abline(v=25.0, lwd=2, lty=2, col='red')

# BMI x BMI_2
with(DepData, plot(bmi, bmi_t2, pch=18, col=rep(c("blue", "darkgreen")),
xlab='BMI in 2008', ylab='BMI in 2009'))
title(main='Scatterplot of BMI in 2008 & 2009
BMI (2008) regression line set to 25 (>25 = overweight/obese)',
cex.main = 1.25)
legend('topleft', c("BMI 2008", "BMI 2008"), pch=c(18),
col=c("blue", "darkgreen"), cex=1.00)
abline(lsfit(bmi, bmi_t2))
abline(v=25.0, lwd=2, lty=2, col='red')

# wc & wc_t2
with(DepData, plot(wc, wc_t2, pch=18, col=rep(c("blue", "darkgreen")),
xlab='Waist Circumference in 2008',
ylab='Waist Circumference in 2009'))
title(main='Scatterplot of Waist Circumference (in Centimeters) 2008 & 2009',
cex.main = 1.25)
legend('topleft', c("Waist Circumference 2008", "Waist Circumference 2009"),
pch=c(18), col=c("blue", "darkgreen"), cex=1.00)
abline(lsfit(wc, wc_t2))
## Warning in lsfit(wc, wc_t2): 6 missing values deleted

# BMI_t2 & WC_t2
with(DepData, plot(bmi_t2, wc_t2, pch=18, col=rep(c("blue", "darkgreen")),
xlab='BMI in 2009', ylab='Waist circumference (in Centimeters)'))
title(main='Scatterplot of BMI & Waist Circumference (2009)
BMI regression line set to 25 (>25 = overweight/obese)', cex.main = 1.25)
legend('topleft', c("BMI", "Waist Circumference"), pch=c(18),
col=c("blue", "darkgreen"), cex=1.00)
abline(lsfit(bmi_t2, wc_t2))
## Warning in lsfit(bmi_t2, wc_t2): 4 missing values deleted
abline(v=25.0, lwd=2, lty=2, col='red')
# BMI x WC scatterplots by gender
library(lattice)

par(mfrow=c(1,1), mar=c(4,5,4,2))
panel.function = function(x, y) {
panel.xyplot(x, y)
panel.abline(lm(y ~ x))
}
# BMI x WC by sex
xyplot(wc ~ bmi | sex, data=DepData, origin=0, panel = panel.function,
xlab='BMI in 2008', ylab='Waist circumference (in centimeters)',
main='Scatterplot for BMI & Waist Circumference (2008) across sex',
cex.main = 2.0)

# BMI x BMI_T2 by sex
xyplot(bmi_t2 ~ bmi | sex, data=DepData, origin=0, panel = panel.function,
xlab='BMI in 2008', ylab='BMI in 2009',
main='Scatterplot for BMI in 2008 & 2009 across sex', cex.main = 2.0)

# WC & WC_t2 by sex
xyplot(wc_t2 ~ wc | sex, data=DepData, origin=0, panel = panel.function,
xlab='Waist Circumference (centimeters) in 2008',
ylab='Waist Circumference (centimeters) in 2009',
main='Scatterplot for Waist Circumference in 2008 & 2009 across sex',
cex.main = 2.0)

# WC_t2 x BMI_T2 by sex
xyplot(wc_t2 ~ bmi_t2 | sex, data=DepData, origin=0, panel = panel.function,
xlab='BMI in 2009', ylab='Waist Circumference (centimeters) 2009',
main='Scatterplot for BMI & Waist Circumference in (2009) across sex',
cex.main = 2.0)

library(ggplot2)
qplot(bmi_t2, data=DepData, geom="density", fill=internet2_t2, alpha=I(.5),
main="Distribution of BMI", xlab="BMI",
ylab="Density")

#######################################################
######## Some analysis on BMI & Depression ############
#######################################################
# Difference between BMI 2008 & BMI 2009
# paired t-test for bmi 2008-2009
t.test(bmi, bmi_t2, paired=TRUE)
##
## Paired t-test
##
## data: bmi and bmi_t2
## t = -3.3494, df = 338, p-value = 0.0009013
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4775823 -0.1241834
## sample estimates:
## mean of the differences
## -0.3008829
#
# sig. differences in BMI suggesst further examining the difference as a seperate variables
BMI_Dif = bmi- bmi_t2
summary(BMI_Dif)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.3590 -1.0530 -0.2081 -0.3009 0.5312 5.7020
plot(sort(BMI_Dif), ylab="Difference in BMI", xlab="cases",
main="Sorted scatter plot for BMI difference between 2008 & 2009")

qplot(BMI_Dif, data=DepData, geom="density", fill=sex, alpha=I(.5),
main="Distribution of BMI", xlab="BMI Difference",
ylab="Density")

qplot(sex, BMI_Dif, data=DepData, geom=c("boxplot"), fill=sex, alpha=I(.5),
main="Boxplot of BMI difference by Sex", xlab="",
ylab="BMI difference")

# 1 - way ANOVE for BMI difference across gender
BMI_sex_dif_fit = aov(BMI_Dif ~ sex, data=DepData)
summary(BMI_sex_dif_fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 1.0 1.003 0.366 0.546
## Residuals 337 923.6 2.741
# CES-D data
ces_d = data.frame(ces = rpois(20, 34))
tabList <- lapply(ces_d, table)
cbind(table("cesd_1", "cesd_1_t2", "cesd_2", "cesd_2_t2", "cesd_3",
"cesd_3_t2", "cesd_4", "cesd_4_t2", "cesd_5", "cesd_5_t2",
"cesd_6", "cesd_6_t2", "cesd_7", "cesd_7_t2", "cesd_8",
"cesd_8_t2", "cesd_9", "cesd_9_t2", "cesd_10", "cesd_10_t2"))
## [,1]
## [1,] 1