2015-01-29
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: sandwich
## The Commander GUI is launched only in interactive sessions
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
> Dataset <-
+
+ read.spss("/home/lil-theta/Dropbox/My documents/projects/UP Amigos/Data 2009/ces-d.sav",
+
+ use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE)
Warning in read.spss("/home/lil-theta/Dropbox/My documents/projects/UP
Amigos/Data 2009/ces-d.sav", : /home/lil-theta/Dropbox/My
documents/projects/UP Amigos/Data 2009/ces-d.sav: Unrecognized record type
7, subtype 18 encountered in system file
re-encoding from CP1252
> colnames(Dataset) <- tolower(colnames(Dataset))
> library(relimp, pos=17)
> local({
+ .Table <- with(Dataset, table(sex))
+ cat("\ncounts:\n")
+ print(.Table)
+ cat("\npercentages:\n")
+ print(round(100*.Table/sum(.Table), 2))
+ })
counts:
sex
Female Male
3509 2728
percentages:
sex
Female Male
56.26 43.74
> with(Dataset, barplot(table(sex), xlab="sex", ylab="Frequency",
+ main="Barplot for sex"))

> with(Dataset, tapply(calc_age, list(sex), mean, na.rm=TRUE))
Female Male
17.57167 17.66935
> Boxplot(calc_age~sex, data=Dataset, id.method="y",
+ main="Boxplot of age by sex")

> library(abind, pos=18)
> library(e1071, pos=19)
Attaching package: 'e1071'
The following object is masked _by_ 'package:Hmisc':
impute
> cbind(local({
+ .Table <- with(Dataset, table(education_father))
+ cat("\ncounts:\n")
+ print(.Table)
+ cat("\npercentages:\n")
+ print(round(100*.Table/sum(.Table), 2))
+ }))
counts:
education_father
none grade school
107 695
middle school high school
1123 907
technical/career school 4 years of college education
673 1917
masters or doctorate
455
percentages:
education_father
none grade school
1.82 11.83
middle school high school
19.11 15.43
technical/career school 4 years of college education
11.45 32.62
masters or doctorate
7.74
[,1]
none 1.82
grade school 11.83
middle school 19.11
high school 15.43
technical/career school 11.45
4 years of college education 32.62
masters or doctorate 7.74
> cbind(local({
+ .Table <- with(Dataset, table(education_mother))
+ cat("\ncounts:\n")
+ print(.Table)
+ cat("\npercentages:\n")
+ print(round(100*.Table/sum(.Table), 2))
+ }))
counts:
education_mother
none grade school
114 866
middle school high school
1316 805
technical/career school 4 years of college education
1147 1618
masters or doctorate
231
percentages:
education_mother
none grade school
1.87 14.20
middle school high school
21.58 13.20
technical/career school 4 years of college education
18.81 26.54
masters or doctorate
3.79
[,1]
none 1.87
grade school 14.20
middle school 21.58
high school 13.20
technical/career school 18.81
4 years of college education 26.54
masters or doctorate 3.79
> library(colorspace, pos=20)
> with(Dataset,cbind(table(income)))
[,1]
Less than $10,000 2633
$10,000 - $14,999 1312
$15,000 - $19,999 656
$20,000 - $24,999 432
$25,000 - $49,999 342
More than $50,000 91
> with(Dataset,cbind(table(oftenint)))
[,1]
Never 18
Less than once a month 176
Monthly 194
Weekly 668
Two to three times per week 2285
Daily 2876
> with(Dataset,cbind(table(game_week), table(game_weekend),
+ table(tv_week), table(tv_weekend)))
[,1] [,2] [,3] [,4]
0 min 743 1181 147 204
15 min 290 295 218 163
30 min 610 631 517 358
1 hr 1574 1129 1447 820
2 hrs 1462 1135 1866 1287
3 hrs 792 806 1102 1402
4 hrs 423 527 578 1186
> with(Dataset, barplot(table(game_week), xlab="game_week", ylab="Frequency",
+
+ main="Barplot for time spent on video games during week"))

> with(Dataset, barplot(table(game_weekend), xlab="game_weekend",
+ ylab="Frequency",
+
+ main="Barplot for time spent on video games during weekend"))

> with(Dataset, barplot(table(tv_week), xlab="tv_week", ylab="Frequency",
+
+ main="Barplot for time spent watching TV during week"))

> with(Dataset, barplot(table(tv_weekend), xlab="tv_weekend",
+ ylab="Frequency",
+ main="Barplot time spent watching TV during weekend"))

> local({
+ .Table <- xtabs(~game_week+game_weekend, data=Dataset)
+ cat("\nFrequency table:\n")
+ print(.Table)
+ .Test <- chisq.test(.Table, correct=TRUE)
+ print(.Test)
+ })
Frequency table:
game_weekend
game_week 0 min 15 min 30 min 1 hr 2 hrs 3 hrs 4 hrs
0 min 539 35 49 61 36 14 5
15 min 90 89 59 28 16 4 2
30 min 111 55 162 198 59 13 2
1 hr 258 60 216 429 401 152 36
2 hrs 116 36 95 266 389 338 163
3 hrs 40 13 32 94 139 183 186
4 hrs 17 5 15 28 67 64 100
Pearson's Chi-squared test
data: .Table
X-squared = 3332.7, df = 36, p-value < 2.2e-16
> local({
+ .Table <- xtabs(~tv_week+tv_weekend, data=Dataset)
+ cat("\nFrequency table:\n")
+ print(.Table)
+ .Test <- chisq.test(.Table, correct=TRUE)
+ print(.Test)
+ })
Frequency table:
tv_weekend
tv_week 0 min 15 min 30 min 1 hr 2 hrs 3 hrs 4 hrs
0 min 55 16 20 14 18 14 4
15 min 24 48 65 31 35 9 1
30 min 28 33 78 153 153 55 12
1 hr 47 32 116 224 459 400 138
2 hrs 34 22 60 255 324 530 494
3 hrs 8 8 11 102 181 205 365
4 hrs 5 2 4 29 90 128 107
Warning in chisq.test(.Table, correct = TRUE): Chi-squared approximation
may be incorrect
Pearson's Chi-squared test
data: .Table
X-squared = 1995.164, df = 36, p-value < 2.2e-16
> numSummary(Dataset[,"bmi"], statistics=c("mean", "sd", "IQR", "quantiles"),
+ quantiles=c(0,.25,.5,.75,1))
mean sd IQR 0% 25% 50% 75% 100%
23.47008 4.449855 5.287516 10.96827 20.3494 22.60026 25.63692 47.43922
n
6237
> numSummary(Dataset[,"wc"], statistics=c("mean", "sd", "IQR", "quantiles"),
+ quantiles=c(0,.25,.5,.75,1))
mean sd IQR 0% 25% 50% 75% 100% n NA
78.66426 11.38685 13 50 71 77 84 160 6147 90
> with(Dataset, tapply(bmi, list(sex), mean, na.rm=TRUE))
Female Male
23.31652 23.66760
> with(Dataset, tapply(bmi, list(sex), sd, na.rm=TRUE))
Female Male
4.385796 4.524070
> Boxplot( ~ bmi, data=Dataset, id.method="y")

[1] "3424" "3021" "851" "4821" "656" "1930" "4913" "1866" "3514" "5648"
[11] "5694"
> densityPlot( ~ bmi, data=Dataset, bw="SJ", adjust=1, kernel="gaussian")

> densityPlot(bmi~sex, data=Dataset, bw="SJ", adjust=1, kernel="gaussian")

> scatterplot(wc~bmi | sex, reg.line=lm, smooth=TRUE, spread=TRUE,
+ id.method='mahal', id.n = 2, boxplots='xy', span=0.5,
+ by.groups=TRUE,
+ data=Dataset)

656 1847 2456 3955
395 740 1438 1638
> with(Dataset, tapply(bmi, sex, var, na.rm=TRUE))
Female Male
19.23521 20.46721
> bartlett.test(bmi ~ sex, data=Dataset)
Bartlett test of homogeneity of variances
data: bmi by sex
Bartlett's K-squared = 2.9634, df = 1, p-value = 0.08517
> AnovaModel.2 <- aov(bmi ~ sex, data=Dataset)
> summary(AnovaModel.2)
Df Sum Sq Mean Sq F value Pr(>F)
sex 1 189 189.17 9.567 0.00199 **
Residuals 6235 123291 19.77
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> with(Dataset, numSummary(bmi, groups=sex, statistics=c("mean", "sd")))
mean sd data:n
Female 23.31652 4.385796 3509
Male 23.66760 4.524070 2728
> with(Dataset, cbind(table(cesd_1), table(cesd_2), table(cesd_3), table(cesd_4),
+ table(cesd_5), table(cesd_6), table(cesd_7), table(cesd_8), table(cesd_9),
+ table(cesd_10)))
[,1] [,2] [,3] [,4] [,5]
rarely or none of the time (less than 1 day) 4086 643 3915 3350 4066
some or a little of the time (1-2 days) 1600 1307 1375 1801 1286
occasionally or moderate amount of time 3-4 days) 412 2346 612 682 564
all of the time (5-7 days) 125 1916 263 338 269
[,6] [,7] [,8] [,9]
rarely or none of the time (less than 1 day) 3737 3520 4078 4906
some or a little of the time (1-2 days) 1389 1504 1458 823
occasionally or moderate amount of time 3-4 days) 693 697 453 303
all of the time (5-7 days) 350 452 197 174
[,10]
rarely or none of the time (less than 1 day) 3448
some or a little of the time (1-2 days) 1886
occasionally or moderate amount of time 3-4 days) 654
all of the time (5-7 days) 229
> barchart(cbind(table(Dataset$cesd_1), table(Dataset$cesd_2), table(Dataset$cesd_3), table(Dataset$cesd_4), table(Dataset$cesd_5), table(Dataset$cesd_6), table(Dataset$cesd_7), table(Dataset$cesd_8), table(Dataset$cesd_9), table(Dataset$cesd_10)), col=topo.colors(10))
