Homework 1

Author: Montserrat Gonzalez
E-mail: montse@andrew
Date: Sep-05-2013

Exercise 1

library(MASS)
library(ggplot2)
library(gridExtra)
## Loading required package: grid
data(survey)
default <- ggplot(survey, aes(factor(Smoke), fill = factor(Smoke))) + geom_bar() + 
    labs(title = "Smoking in College Students - Default", x = "", y = "Count", 
        fill = "Smoking level") + theme(plot.title = element_text(size = rel(2))) + 
    scale_fill_brewer(palette = "Set1")
survey$Smoke = factor(survey$Smoke, levels = c("Never", "Occas", "Regul", "Heavy"))
ordered <- ggplot(survey, aes(factor(Smoke), fill = factor(Smoke))) + geom_bar() + 
    labs(title = "Smoking in College Students - Ordered", x = "", fill = "Smoking level") + 
    theme(plot.title = element_text(size = rel(2))) + scale_fill_brewer(palette = "Set2")
grid.arrange(default, ordered, ncol = 2)

plot of chunk unnamed-chunk-2

default <- ggplot(survey, aes(factor(Exer), fill = factor(Exer))) + geom_bar() + 
    labs(title = "Exercising in College Students - Default", x = "", y = "Count", 
        fill = "Exercise level") + theme(plot.title = element_text(size = rel(2))) + 
    scale_fill_brewer(palette = "Set3") + coord_flip()
survey$Exer = factor(survey$Exer, levels = c("None", "Some", "Freq"))
ordered <- ggplot(survey, aes(factor(Exer), fill = factor(Exer))) + geom_bar() + 
    labs(title = "Exercising in College Students - Ordered", x = "", fill = "Exercise level") + 
    theme(plot.title = element_text(size = rel(2))) + scale_fill_brewer(palette = "Set4") + 
    coord_flip()
## Warning: Unknown palette Set4
grid.arrange(default, ordered, ncol = 2)

plot of chunk unnamed-chunk-3

Describe how your graphs display the distributions of how much often the students smoke and exercise. What efect does ordering the categories have on the graphic? What information do you gain (if any)? Which graphic do you prefer? Why?

The graphs display effectively the distributions of how much often students smoke and exercise, mainly because there is a small number of categories for each case. Ordering affects how quickly you can get the message of what is the tendency in data; when the graph is randomly ordered you have to go through each category(incrementally or decrementally) and make comparisons mentally, whereas with a sorted graph you know immediatelly whether the tendency is positive or negative. I definitely prefer the ordered graph.

Exercise 2

data(birthwt)
birthwt$race = factor(birthwt$race, labels = c("white", "black", "other"))
birthwt$smoke = factor(birthwt$smoke, labels = c("no", "yes"))
raceplot <- ggplot(birthwt, aes(factor(race), fill = factor(smoke))) + geom_bar() + 
    labs(title = "Race in Newborns", x = "Race", y = "Count", fill = "Smoking level") + 
    scale_fill_brewer(palette = "Set1") + theme(plot.title = element_text(size = rel(2)))
smokeplot <- ggplot(birthwt, aes(factor(smoke), fill = factor(ptl))) + geom_bar() + 
    labs(title = "Smoking Habits", x = "Smoking level", y = "Count", fill = "PTL") + 
    scale_fill_brewer(palette = "Set2") + theme(plot.title = element_text(size = rel(2)))
ptlplot <- ggplot(birthwt, aes(factor(ptl), fill = factor(race))) + geom_bar() + 
    labs(title = "Premature Labors", x = "PTL", y = "Count", fill = "Smoking level") + 
    scale_fill_brewer(palette = "Set3") + theme(plot.title = element_text(size = rel(2)))
grid.arrange(raceplot, smokeplot, ptlplot, ncol = 3)

plot of chunk unnamed-chunk-4

par(mfrow = c(1, 3))
racesmoke <- table(birthwt[, c("race", "smoke")])
mosaicplot(racesmoke, shade = TRUE, color = "purple", main = "Race vs Smoking moms")
smokeptl <- table(birthwt[, c("ptl", "smoke")])
mosaicplot(smokeptl, shade = TRUE, main = "PTL vs Smoking moms")
ptlrace <- table(birthwt[, c("race", "ptl")])
mosaicplot(ptlrace, shade = TRUE, main = "PTL vs Race")

plot of chunk unnamed-chunk-5

Exercise 3

library(epicalc)
## Loading required package: foreign Loading required package: survival
## Loading required package: splines Loading required package: nnet
data(minn38)
hsdata = aggregate(formula = f ~ phs + fol + sex, data = minn38, FUN = sum)
exphsdata = expand(hsdata, index.var = "f", retain.freq = FALSE)

floplot <- ggplot(exphsdata, aes(phs, fill = factor(fol))) + geom_bar() + labs(title = "Post high school status vs Father Occupational Level", 
    x = "Post High School Status", y = "Count", fill = "Father Occupational Level") + 
    scale_fill_brewer(palette = "Set2") + theme(plot.title = element_text(size = rel(1)))

sexplot <- ggplot(exphsdata, aes(phs, fill = factor(sex))) + geom_bar() + labs(title = "Post High School Status vs Sex", 
    x = "Post High School Status", y = "Count", fill = "Sex") + scale_fill_brewer(palette = "Set3") + 
    theme(plot.title = element_text(size = rel(1)))

allplot <- ggplot(exphsdata, aes(x = fol, y = phs, colour = factor(sex))) + 
    geom_jitter() + labs(title = "Post High School Status vs Father Occupational Level vs Sex", 
    y = "Post High School Status", x = "Father Occupational Level", colour = "Sex") + 
    theme(plot.title = element_text(size = rel(1)))
grid.arrange(floplot, sexplot, allplot, ncol = 3)

plot of chunk unnamed-chunk-6


par(mfrow = c(1, 2))
mosaicplot(exphsdata$fol ~ exphsdata$phs, ylab = "Post High School Status", 
    xlab = "Father Occupational Level", shade = TRUE, main = "Post high school status vs Father Occupational Level")
mosaicplot(exphsdata$sex ~ exphsdata$phs, ylab = "Post High School Status", 
    xlab = "Sex", shade = TRUE, main = "Post High School Status vs Sex")

plot of chunk unnamed-chunk-6