Hi all! Here are my Workshop 3 codes.

Download GSS2012.csv here

Question 1:

gss <- read.csv("GSS2012.csv")
str(gss)
## 'data.frame':    4820 obs. of  9 variables:
##  $ marital: int  5 5 1 1 4 2 1 4 1 5 ...
##  $ age    : int  22 21 42 49 70 50 35 24 28 28 ...
##  $ sex    : int  1 1 1 2 2 2 2 2 2 2 ...
##  $ race   : int  1 1 3 1 2 1 1 3 2 1 ...
##  $ hompop : int  3 5 4 3 1 2 4 4 2 1 ...
##  $ happy  : int  1 1 2 1 1 1 2 2 3 2 ...
##  $ health : int  2 1 2 0 0 4 0 3 3 2 ...
##  $ satjob : int  1 1 3 0 0 0 1 2 2 2 ...
##  $ fincome: num  178712 178712 91920 107240 42130 ...
# 1a.
# recode all "0" to NA
gss[gss == 0] <- NA
# creates boolean for each column of whether each data is NA
boolean <- apply(gss, 2, is.na)
# sum the "TRUE" in each column
apply(boolean, 2, sum)
## marital     age     sex    race  hompop   happy  health  satjob fincome 
##       2      51       0       0       1      14    1672    1266     446
# percentage of NA
mean(is.na(gss)) # not required in question
## [1] 0.07957584
# 1b.
# from example
gss$sex <- factor(gss$sex, levels = c(1,2), labels = c("Male", "Female"))

# factor
gss$marital <- factor(gss$marital, levels = c(1,2,3,4,5), 
                      labels = c("Married","Widowed", 
                                 "Divorced", "Separated", "Never Married"))
gss$race <- factor(gss$race, levels = c(1,2,3), labels = c("White", "Black", "Other"))

gss$happy <- factor(gss$happy, levels = c(1,2,3), 
                    labels = c("Very happy", "Pretty happy", "Not too happy"))
gss$health <- factor(gss$health, levels = c(1,2,3,4), 
                     labels = c("Excellent", "Good", "Fair", "Poor"))
gss$satjob <- factor(gss$satjob, levels = c(1,2,3,4), 
                     labels = c("Very satified", "Moderately satisfied", 
                                "A little dissatisfied", "Very dissatisfied"))
str(gss)
## 'data.frame':    4820 obs. of  9 variables:
##  $ marital: Factor w/ 5 levels "Married","Widowed",..: 5 5 1 1 4 2 1 4 1 5 ...
##  $ age    : int  22 21 42 49 70 50 35 24 28 28 ...
##  $ sex    : Factor w/ 2 levels "Male","Female": 1 1 1 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 3 levels "White","Black",..: 1 1 3 1 2 1 1 3 2 1 ...
##  $ hompop : int  3 5 4 3 1 2 4 4 2 1 ...
##  $ happy  : Factor w/ 3 levels "Very happy","Pretty happy",..: 1 1 2 1 1 1 2 2 3 2 ...
##  $ health : Factor w/ 4 levels "Excellent","Good",..: 2 1 2 NA NA 4 NA 3 3 2 ...
##  $ satjob : Factor w/ 4 levels "Very satified",..: 1 1 3 NA NA NA 1 2 2 2 ...
##  $ fincome: num  178712 178712 91920 107240 42130 ...
# 1c.
gss$pcincome <- gss$fincome / gss$hompop
str(gss)
## 'data.frame':    4820 obs. of  10 variables:
##  $ marital : Factor w/ 5 levels "Married","Widowed",..: 5 5 1 1 4 2 1 4 1 5 ...
##  $ age     : int  22 21 42 49 70 50 35 24 28 28 ...
##  $ sex     : Factor w/ 2 levels "Male","Female": 1 1 1 2 2 2 2 2 2 2 ...
##  $ race    : Factor w/ 3 levels "White","Black",..: 1 1 3 1 2 1 1 3 2 1 ...
##  $ hompop  : int  3 5 4 3 1 2 4 4 2 1 ...
##  $ happy   : Factor w/ 3 levels "Very happy","Pretty happy",..: 1 1 2 1 1 1 2 2 3 2 ...
##  $ health  : Factor w/ 4 levels "Excellent","Good",..: 2 1 2 NA NA 4 NA 3 3 2 ...
##  $ satjob  : Factor w/ 4 levels "Very satified",..: 1 1 3 NA NA NA 1 2 2 2 ...
##  $ fincome : num  178712 178712 91920 107240 42130 ...
##  $ pcincome: num  59571 35742 22980 35747 42130 ...

Question 2

# 2a.
summary(gss)
##           marital          age            sex          race     
##  Married      :2255   Min.   :18.00   Male  :2132   White:3700  
##  Widowed      : 404   1st Qu.:35.00   Female:2688   Black: 722  
##  Divorced     : 798   Median :49.00                 Other: 398  
##  Separated    : 159   Mean   :49.59                             
##  Never Married:1202   3rd Qu.:62.00                             
##  NA's         :   2   Max.   :89.00                             
##                       NA's   :51                                
##      hompop                 happy            health    
##  Min.   : 1.000   Very happy   :1391   Excellent: 794  
##  1st Qu.: 1.000   Pretty happy :2756   Good     :1475  
##  Median : 2.000   Not too happy: 659   Fair     : 694  
##  Mean   : 2.588   NA's         :  14   Poor     : 185  
##  3rd Qu.: 3.000                        NA's     :1672  
##  Max.   :10.000                                        
##  NA's   :1                                             
##                    satjob        fincome          pcincome        
##  Very satified        :1761   Min.   :   383   Min.   :    54.71  
##  Moderately satisfied :1311   1st Qu.: 18193   1st Qu.:  8138.75  
##  A little dissatisfied: 340   Median : 34470   Median : 16277.50  
##  Very dissatisfied    : 142   Mean   : 49894   Mean   : 23109.12  
##  NA's                 :1266   3rd Qu.: 63195   3rd Qu.: 30640.00  
##                               Max.   :178712   Max.   :178712.46  
##                               NA's   :446      NA's   :446
# 2b.
hist(gss$fincome)

# 2c.
plot(gss$happy, gss$fincome, xlab = "happy", ylab = "fincome", main = "fincome vs happy")

plot(gss$race, gss$fincome, xlab = "race", ylab = "fincome", main = "fincome vs race")

# 2d.
# group 1, 30 =< age <50
sub1gss <- subset(gss, gss$age <50 & gss$age >= 30)
# group 2, age <30 or age >= 50
sub2gss <- subset(gss, gss$age >= 50 | gss$age < 30)


par(mfcol = c(2,2))
plot(sub1gss$happy, sub1gss$fincome, 
     xlab = "happy", ylab = "fincome", 
     main = "fincome vs happy, 30 =< age <50", cex.axis = 0.8)
plot(sub1gss$race, sub1gss$fincome, 
     xlab = "race", ylab = "fincome", 
     main = "fincome vs race, 30 =< age <50", cex.axis = 0.8)
plot(sub2gss$happy, sub2gss$fincome, 
     xlab = "happy", ylab = "fincome", 
     main = "fincome vs happy, age <30 or age >= 50", cex.axis = 0.8)
plot(sub2gss$race, sub2gss$fincome, 
     xlab = "race", ylab = "fincome", 
     main = "fincome vs race, age <30 or age >= 50", cex.axis = 0.8)

par(mfcol = c(1,1))
# 2d.
gssNoNA <- na.omit(gss)
library(ggplot2)
g0 <- ggplot(gssNoNA, aes(x = age))
g0 + geom_bar(aes(fill = happy))

# 2f.
g0 + geom_bar(aes(fill = health))

# 2g.
g0 + geom_bar(aes(fill = satjob))

Return to contents page