##    PUBID_1997        SEX             RACE            AGE       
##  Min.   :   1   Min.   :1.000   Min.   :1.000   Min.   :26.00  
##  1st Qu.:2249   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:28.00  
##  Median :4502   Median :1.000   Median :4.000   Median :29.00  
##  Mean   :4504   Mean   :1.488   Mean   :2.788   Mean   :28.79  
##  3rd Qu.:6758   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:30.00  
##  Max.   :9022   Max.   :2.000   Max.   :4.000   Max.   :32.00  
##                                                 NA's   :1561   
##    TEEN_HOURS     ADULT_Hours   
##  Min.   :    0   Min.   :    0  
##  1st Qu.: 1255   1st Qu.: 8190  
##  Median : 2741   Median :16396  
##  Mean   : 3105   Mean   :15595  
##  3rd Qu.: 4470   3rd Qu.:22418  
##  Max.   :18829   Max.   :63722  
##  NA's   :707     NA's   :1596
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: magrittr
## Loading required package: ggvis

Question 1
NLS DATASET

## Observations: 8984
## Variables:
## $ PUBID_1997  (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ SEX         (int) 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2,...
## $ RACE        (int) 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2,...
## $ AGE         (int) 29, 29, 28, 30, 29, 29, 28, 30, 29, NA, 29, 29, 26...
## $ TEEN_HOURS  (int) 5831, NA, 6489, 3292, 680, NA, 1650, 2082, 864, 0,...
## $ ADULT_Hours (int) NA, 29712, NA, 23390, 28056, 18379, NA, 18419, 192...
##    PUBID_1997        SEX             RACE            AGE       
##  Min.   :   1   Min.   :1.000   Min.   :1.000   Min.   :26.00  
##  1st Qu.:2249   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:28.00  
##  Median :4502   Median :1.000   Median :4.000   Median :29.00  
##  Mean   :4504   Mean   :1.488   Mean   :2.788   Mean   :28.79  
##  3rd Qu.:6758   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:30.00  
##  Max.   :9022   Max.   :2.000   Max.   :4.000   Max.   :32.00  
##                                                 NA's   :1561   
##    TEEN_HOURS     ADULT_Hours   
##  Min.   :    0   Min.   :    0  
##  1st Qu.: 1255   1st Qu.: 8190  
##  Median : 2741   Median :16396  
##  Mean   : 3105   Mean   :15595  
##  3rd Qu.: 4470   3rd Qu.:22418  
##  Max.   :18829   Max.   :63722  
##  NA's   :707     NA's   :1596

Question 2
NLS DATASET: Filtered by people who were age 30 during interviews in 2011 with N=1,546

## Source: local data frame [1,546 x 6]
## 
##    PUBID_1997 SEX RACE AGE TEEN_HOURS ADULT_Hours
## 1           4   2    2  30       3292       23390
## 2           8   2    4  30       2082       18419
## 3          26   1    1  30        760           0
## 4          27   1    1  30       1592       11060
## 5          32   2    4  30       4611       16981
## 6          33   2    4  30       1862       26551
## 7          38   2    4  30          0          NA
## 8          55   2    2  30       1368       16934
## 9          59   2    1  30       5648          NA
## 10         68   1    1  30       1316          NA
## ..        ... ...  ... ...        ...         ...
##    PUBID_1997        SEX             RACE            AGE    
##  Min.   :   4   Min.   :1.000   Min.   :1.000   Min.   :30  
##  1st Qu.:2528   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:30  
##  Median :4798   Median :2.000   Median :3.000   Median :30  
##  Mean   :4712   Mean   :1.504   Mean   :2.732   Mean   :30  
##  3rd Qu.:7014   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:30  
##  Max.   :9009   Max.   :2.000   Max.   :4.000   Max.   :30  
##                                                             
##    TEEN_HOURS     ADULT_Hours   
##  Min.   :    0   Min.   :    0  
##  1st Qu.: 1423   1st Qu.:13189  
##  Median : 2925   Median :20847  
##  Mean   : 3252   Mean   :19444  
##  3rd Qu.: 4510   3rd Qu.:25961  
##  Max.   :14334   Max.   :63722  
##  NA's   :130     NA's   :334
## Observations: 1546
## Variables:
## $ PUBID_1997  (int) 4, 8, 26, 27, 32, 33, 38, 55, 59, 68, 69, 78, 80, ...
## $ SEX         (int) 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2,...
## $ RACE        (int) 2, 4, 1, 1, 4, 4, 4, 2, 1, 1, 1, 4, 1, 2, 4, 4, 1,...
## $ AGE         (int) 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30...
## $ TEEN_HOURS  (int) 3292, 2082, 760, 1592, 4611, 1862, 0, 1368, 5648, ...
## $ ADULT_Hours (int) 23390, 18419, 0, 11060, 16981, 26551, NA, 16934, N...

Question 3
Test the null hypothesis that there is no difference by sex between the mean cumulative hours work from age 14 through age 19

Hypothesis (null) There is no difference by sex between the mean cumulative work hours of ages 14 through age 19.

Hypothesis (alternative) There is a difference by sex between the mean cumulative work hours of ages 14 through age 19.

A t-test will be used to analyze the data at an alpha level of .05

thirty1%>%
  filter(SEX>= 0, TEEN_HOURS >0)%>% 
  ggvis(~TEEN_HOURS) %>% layer_histograms()
## Guessing width = 500 # range / 29

Analysis of the histogram of cummulative work hours from age 14-19 shows outliers that impact the mean cummulative hours for the t-test analysis. Therefore, the dataset was trimmed for the t-test analysis.

thirty1%>%
  filter(SEX>= 0, (TEEN_HOURS >0 & TEEN_HOURS < 11000)) %>%
  group_by(SEX, na.rm=TRUE)%>%
  summarize(n=n(),mean_sex=mean(SEX, na.rm=TRUE), 
            mean_teenhrs=mean(TEEN_HOURS, na.rm=TRUE))
## Source: local data frame [2 x 5]
## Groups: SEX
## 
##   SEX na.rm   n mean_sex mean_teenhrs
## 1   1  TRUE 671        1     3521.080
## 2   2  TRUE 680        2     3094.515
t.test(TEEN_HOURS ~ SEX, thirty1, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  TEEN_HOURS by SEX
## t = 4.4273, df = 1414, p-value = 1.028e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  306.6484 794.5792
## sample estimates:
## mean in group 1 mean in group 2 
##        3528.906        2978.293

The null hypothesis is rejected at the specified .05 level, t=4.42, p <.05, 95% CI[306.65, 794.56].


Question 4
Test the null hypothesis that there is no difference by sex between the mean cumulative hours work from age 20 and older

Hypothesis (null) There is no difference by sex between the mean cumulative work hours from age 20 and older.

Hypothesis (alternative) There is a difference by sex between the mean cumulative work hours from age 20 and older.

A t-test will be used to analyze the data at an alpha level of .05

thirty1%>%
  filter(SEX>= 0, ADULT_Hours >0)%>% 
  ggvis(~ADULT_Hours) %>% layer_histograms()
## Guessing width = 2000 # range / 32

Analysis of the histogram of cummulative work hours from age 20 and older shows outliers that impact the mean cummulative hours for the t-test analysis. Therefore, the dataset was trimmed for the t-test analysis.

thirty1%>%
filter(SEX>= 0, ADULT_Hours >0 & ADULT_Hours < 45000) %>%
  group_by(SEX, na.rm=TRUE)%>%
  summarize(n=n(),mean_sex=mean(SEX, na.rm=TRUE), 
            mean_adulthrs=mean(ADULT_Hours, na.rm=TRUE))
## Source: local data frame [2 x 5]
## Groups: SEX
## 
##   SEX na.rm   n mean_sex mean_adulthrs
## 1   1  TRUE 581        1      20966.57
## 2   2  TRUE 609        2      18431.95
t.test(ADULT_Hours ~ SEX, thirty1, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  ADULT_Hours by SEX
## t = 4.6742, df = 1210, p-value = 3.283e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1404.148 3435.568
## sample estimates:
## mean in group 1 mean in group 2 
##        20671.98        18252.12

The null hypothesis is rejected at the specified .05 level, t=4.67, p <.05, 95% CI[1401.15, 3435.57].


Question 5
Test the null hypothesis that there is no difference by race/ethnicity between the mean cumulative hours work from age 14 through age 19. In this analysis — and in the analysis for item (6) — code race/ethnicity as “1” if race/ethnicity is “Non-Black, Non-Hispanic” and “0” otherwise

Hypothesis (null) There is no difference by race/ethnicity between the mean cumulative work hours of ages 14 through age 19.

Hypothesis (alternative) There is a difference by race/ethnicity between the mean cumulative work hours of ages 14 through age 19.

A t-test will be used to analyze the data at an alpha level of .05 First, the variable RACE was recoded:

thirty1$RACE1<-1
thirty1$RACE1<- ifelse(thirty1$RACE==1, 0, thirty1$RACE1)
thirty1$RACE1<- ifelse(thirty1$RACE==2, 0, thirty1$RACE1)
thirty1$RACE1<- ifelse(thirty1$RACE==3, 0, thirty1$RACE1)
thirty1%>%
  filter(RACE1>= 0, (TEEN_HOURS >0 & TEEN_HOURS < 11000)) %>%group_by(RACE1, na.rm=TRUE)%>%
  summarize(n=n(),mean_RACE=mean(RACE1, na.rm=TRUE), 
            mean_teenhrs=mean(TEEN_HOURS, na.rm=TRUE))
## Source: local data frame [2 x 5]
## Groups: RACE1
## 
##   RACE1 na.rm   n mean_RACE mean_teenhrs
## 1     0  TRUE 659         0     2980.275
## 2     1  TRUE 692         1     3616.928
t.test(TEEN_HOURS ~ RACE1, thirty1, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  TEEN_HOURS by RACE1
## t = -6.3596, df = 1414, p-value = 2.725e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1027.4393  -543.0211
## sample estimates:
## mean in group 0 mean in group 1 
##        2860.372        3645.603

The null hypothesis is rejected at the specified .05 level, t=-6.36, p <.05, 95% CI[-1027.44, -543.02].


Question 6
Test the null hypothesis that there is no difference by race/ethnicity between the mean cumulative hours work from age 20 and older.

Hypothesis (null) There is no difference by race/ethnicity between the mean cumulative work hours from age 20 and older.

Hypothesis(alternative) There is a difference by race/ethnicity between the mean cumulative work hours from age 20 and older.

A t-test will be used to analyze the data at an alpha level of .05

thirty1%>%
  filter(RACE1>= 0, ADULT_Hours >0 & ADULT_Hours < 45000) %>%
  group_by(RACE1, na.rm=TRUE)%>%
  summarize(n=n(),mean_RACE=mean(RACE1, na.rm=TRUE), 
            mean_adulthrs=mean(ADULT_Hours, na.rm=TRUE))
## Source: local data frame [2 x 5]
## Groups: RACE1
## 
##   RACE1 na.rm   n mean_RACE mean_adulthrs
## 1     0  TRUE 579         0      18764.36
## 2     1  TRUE 611         1      20527.11
t.test(ADULT_Hours ~ RACE1, thirty1, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  ADULT_Hours by RACE1
## t = -4.239, df = 1210, p-value = 2.416e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3215.729 -1180.859
## sample estimates:
## mean in group 0 mean in group 1 
##        18321.36        20519.65

The null hypothesis is rejected at the specified .05 level, t=-4.24, p <.05, 95% CI[-3215.73, -1180.86].