TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(ggplot2)

Step 2: Import the data

TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx")

Step 3: Preview the data

view(TAS_data_long_format_age)
head(TAS_data_long_format_age)
## # A tibble: 6 × 42
##     TAS TAS05 TAS09 TAS15 `1968 Interview Number` `Person Number` Gender
##   <dbl> <dbl> <dbl> <dbl>                   <dbl>           <dbl>  <dbl>
## 1     2     1     1    NA                       4             180      2
## 2     2     1     1    NA                       5              32      2
## 3     2     1     1    NA                       6              34      1
## 4     2     1     1    NA                      14              30      1
## 5     1     1    NA    NA                      18              38      2
## 6     2     1     1    NA                      47              34      2
## # ℹ 35 more variables: `Individual is sample` <dbl>, `Year ID Number` <dbl>,
## #   `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## #   `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## #   C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## #   E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## #   G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## #   G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>, …

2005 & 2009

Step 5: demographics (2005 & 2009)

Filter the data (2005 & 2009)

Long_format_2005_2009 <- TAS_data_long_format_age %>% filter(year < 2010) %>% filter (TAS05 == 1) %>%  filter (TAS09 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>%  group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2027 ~ Age_18_graduate[year == 2005] + 4, Age_18_graduate == 2023 ~ Age_18_graduate[year == 2009] - 4, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>%  group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2009] - Age_18_graduate[year == 2005]) %>% filter(age_difference < 6) %>% filter(age_difference > 2) %>% ungroup()
view(Long_format_2005_2009)
knitr::kable(head(Long_format_2005_2009[, 1:43]))
TAS TAS05 TAS09 TAS15 TAS_ID Gender Individual is sample Year ID Number Sequence Number Relationship to Head Release Number B5A B5D B6C C2D C2E C2F D2D3_month D2D3_year E1_1st_mention E1_2nd_mention E1_3rd_mention E3 G1 G2_month G2_year G10 G11 G30A G41A G41B G41C G41H G41P H1 L7_1st_mention L7_2nd_mention L7_3rd_mention Age_17_graduate Age_18_graduate year year_new age_difference
2 1 1 NA 5_32 2 2 624 3 30 5 5 5 5 7 7 7 0 0 1 7 0 0 1 5 2002 1 1 7 7 6 6 7 5 2 1 0 0 20 21 2005 -1 4
2 1 1 NA 6_34 1 2 1202 51 30 5 2 2 6 1 1 1 0 0 7 0 0 5 1 5 2002 1 1 0 7 5 7 5 3 1 1 0 0 20 21 2005 -1 4
2 1 1 NA 14_30 1 2 736 51 30 5 4 4 4 2 1 1 0 0 2 0 0 0 1 6 2003 1 5 6 5 6 6 5 5 2 1 0 0 19 20 2005 -1 4
2 1 1 NA 47_34 2 2 2516 3 30 5 4 5 6 4 5 2 0 0 1 0 0 0 1 5 2005 5 0 6 3 6 4 7 4 1 1 0 0 17 18 2005 -1 4
2 1 1 NA 53_35 2 2 1392 3 33 5 4 5 5 3 1 1 0 0 1 0 0 0 1 6 2002 1 1 7 6 7 7 7 5 1 1 0 0 20 21 2005 -1 4
2 1 1 NA 53_36 2 2 1616 3 30 5 4 5 7 4 1 1 0 0 6 0 0 1 1 6 2005 1 5 7 7 7 5 7 6 2 1 0 0 17 18 2005 -1 4

Demographics

Year

count(Long_format_2005_2009, year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   542
## 2  2009   542

Age

count(Long_format_2005_2009, Age_18_graduate)
## # A tibble: 12 × 2
##    Age_18_graduate     n
##              <dbl> <int>
##  1              14     1
##  2              17     7
##  3              18   166
##  4              19   150
##  5              20   141
##  6              21    83
##  7              22   167
##  8              23   150
##  9              24   140
## 10              25    76
## 11              26     2
## 12              27     1

Age by year(2005)

Long_format_2005_2009 %>% filter(year == 2005) %>% count(Age_18_graduate)
## # A tibble: 8 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              14     1
## 2              17     7
## 3              18   165
## 4              19   150
## 5              20   141
## 6              21    76
## 7              22     1
## 8              23     1

Age by year (2009)

Long_format_2005_2009 %>% filter(year == 2009) %>% count(Age_18_graduate)
## # A tibble: 8 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18     1
## 2              21     7
## 3              22   166
## 4              23   149
## 5              24   140
## 6              25    76
## 7              26     2
## 8              27     1

Gender 1 = Male 2 = Female

Long_format_2005_2009 %>% group_by(year) %>% count(Gender)
## # A tibble: 4 × 3
## # Groups:   year [2]
##    year Gender     n
##   <dbl>  <dbl> <int>
## 1  2005      1   245
## 2  2005      2   297
## 3  2009      1   245
## 4  2009      2   297

G2: Years graduated high school

Long_format_2005_2009 %>% group_by(year) %>% count(G2_year)
## # A tibble: 15 × 3
## # Groups:   year [2]
##     year G2_year     n
##    <dbl>   <dbl> <int>
##  1  2005       0    18
##  2  2005    2000     1
##  3  2005    2001     1
##  4  2005    2002    76
##  5  2005    2003   139
##  6  2005    2004   148
##  7  2005    2005   159
##  8  2009       0     6
##  9  2009    2001     2
## 10  2009    2002    76
## 11  2009    2003   140
## 12  2009    2004   146
## 13  2009    2005   164
## 14  2009    2006     7
## 15  2009    2009     1
Long_format_2005_2009 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G2_year) %>% ungroup() 
## # A tibble: 10 × 3
##     year G2_year     n
##    <dbl>   <dbl> <int>
##  1  2005    2001     1
##  2  2005    2002    76
##  3  2005    2003   139
##  4  2005    2004   145
##  5  2005    2005   157
##  6  2009    2001     2
##  7  2009    2002    76
##  8  2009    2003   138
##  9  2009    2004   144
## 10  2009    2005   158
Long_format_2005_2009 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year) 
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   518
## 2  2009   518
mean_year_graduation <- Long_format_2005_2009 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(mean_graduation = mean(G2_year, na.rm = TRUE)) %>% ungroup()
mean_year_graduation
## # A tibble: 2 × 2
##    year mean_graduation
##   <dbl>           <dbl>
## 1  2005           2004.
## 2  2009           2004.
sd_year_graduation <- Long_format_2005_2009 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_graduation = sd(G2_year, na.rm = TRUE)) %>% ungroup()
sd_year_graduation
## # A tibble: 2 × 2
##    year sd_graduation
##   <dbl>         <dbl>
## 1  2005          1.05
## 2  2009          1.06

L7: Race – 1st mention 1 = White 2 = Black, African-American, or Negro 3 = American Indian or Alaska Native 4 = Asian 5 = Native Hawaiian or Pacific Islander 7 = Some other race 8 = DK 9 = NA; refused

Long_format_2005_2009 %>% group_by(year) %>% count(L7_1st_mention)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year L7_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              1   293
##  2  2005              2   208
##  3  2005              3     3
##  4  2005              4     6
##  5  2005              5     3
##  6  2005              7     6
##  7  2005              8     2
##  8  2005              9    21
##  9  2009              1   306
## 10  2009              2   206
## 11  2009              3     5
## 12  2009              4     7
## 13  2009              7    17
## 14  2009              9     1

L7: Race – 2nd mention

Long_format_2005_2009 %>% group_by(year) %>% count(L7_2nd_mention)
## # A tibble: 12 × 3
## # Groups:   year [2]
##     year L7_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              0   529
##  2  2005              1     1
##  3  2005              2     4
##  4  2005              3     4
##  5  2005              5     2
##  6  2005              7     2
##  7  2009              0   514
##  8  2009              1     1
##  9  2009              2     6
## 10  2009              3    17
## 11  2009              4     1
## 12  2009              7     3

L7: Race – 3rd mention

Long_format_2005_2009 %>% group_by(year) %>% count(L7_3rd_mention)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year L7_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2005              0   540
## 2  2005              3     1
## 3  2005              5     1
## 4  2009              0   536
## 5  2009              3     3
## 6  2009              5     1
## 7  2009              7     2

D2D3: Widowed – year

Long_format_2005_2009 %>% group_by(year) %>% count(D2D3_year)
## # A tibble: 5 × 3
## # Groups:   year [2]
##    year D2D3_year     n
##   <dbl>     <dbl> <int>
## 1  2005         0   542
## 2  2009         0   538
## 3  2009      2006     2
## 4  2009      2007     1
## 5  2009      2009     1

E1: Employment status – 1st mention 1 = Working now, including military 2 = Only temporarily laid off; sick or maternity leave 3 = Looking for work, unemployed 4 = Retired 5 = Disabled, permanently or temporarily 6 = Keeping house 7 = Student 8 = Other

Long_format_2005_2009 %>% group_by(year) %>% count(E1_1st_mention)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year E1_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              1   260
##  2  2005              2     3
##  3  2005              3    50
##  4  2005              5     1
##  5  2005              6    10
##  6  2005              7   213
##  7  2005              8     5
##  8  2009              1   376
##  9  2009              2     1
## 10  2009              3    77
## 11  2009              5     2
## 12  2009              6    20
## 13  2009              7    65
## 14  2009              8     1

E1: Employment status – 2nd mention

Long_format_2005_2009 %>% group_by(year) %>% count(E1_2nd_mention)
## # A tibble: 13 × 3
## # Groups:   year [2]
##     year E1_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              0   399
##  2  2005              1    27
##  3  2005              3     8
##  4  2005              5     1
##  5  2005              6     2
##  6  2005              7   105
##  7  2009              0   405
##  8  2009              1    23
##  9  2009              3    10
## 10  2009              5     2
## 11  2009              6    10
## 12  2009              7    91
## 13  2009              8     1

E1: Employment status – 3rd mention

Long_format_2005_2009 %>% group_by(year) %>% count(E1_3rd_mention)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year E1_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2005              0   538
## 2  2005              1     1
## 3  2005              3     1
## 4  2005              6     1
## 5  2005              7     1
## 6  2009              0   541
## 7  2009              1     1

E3: Work for money 1 = Yes 5 = No 8 = DK 9 = NA; refused

Long_format_2005_2009 %>% group_by(year) %>% count(E3)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year    E3     n
##   <dbl> <dbl> <int>
## 1  2005     0   290
## 2  2005     1    71
## 3  2005     5   180
## 4  2005     9     1
## 5  2009     0   401
## 6  2009     1    20
## 7  2009     5   121

G1: Education status 1 = Graduated from high school 2 = Got a GED 3 = Neither

Long_format_2005_2009 %>% group_by(year) %>% count(G1)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year    G1     n
##   <dbl> <dbl> <int>
## 1  2005     1   526
## 2  2005     2     1
## 3  2005     3    13
## 4  2005     9     2
## 5  2009     1   537
## 6  2009     2     4
## 7  2009     3     1

G10: Attended College 1 = Yes 5 = No

Long_format_2005_2009 %>% group_by(year) %>% count(G10)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year   G10     n
##   <dbl> <dbl> <int>
## 1  2005     0    14
## 2  2005     1   414
## 3  2005     5   113
## 4  2005     9     1
## 5  2009     0     1
## 6  2009     1   443
## 7  2009     5    98

G11: Attending College 1 = Yes 5 = No

Long_format_2005_2009 %>% group_by(year) %>% count(G11)
## # A tibble: 6 × 3
## # Groups:   year [2]
##    year   G11     n
##   <dbl> <dbl> <int>
## 1  2005     0   128
## 2  2005     1   347
## 3  2005     5    67
## 4  2009     0    99
## 5  2009     1   166
## 6  2009     5   277

2009 & 2015

Step 6: Demographics(2009 & 2015)

Filter the data (2009 & 2015)

Long_format_2009_2015 <- TAS_data_long_format_age %>% filter (TAS09 == 1) %>%  filter (TAS15 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>%  group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2033 ~ Age_18_graduate[year == 2009] + 6, Age_18_graduate == 2027 ~ Age_18_graduate[year == 2015] - 6, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>%  group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2015] - Age_18_graduate[year == 2009]) %>% filter(age_difference < 8) %>% filter(age_difference > 4) %>% ungroup()
view(Long_format_2009_2015)
knitr::kable(head(Long_format_2009_2015[, 1:43]))
TAS TAS05 TAS09 TAS15 TAS_ID Gender Individual is sample Year ID Number Sequence Number Relationship to Head Release Number B5A B5D B6C C2D C2E C2F D2D3_month D2D3_year E1_1st_mention E1_2nd_mention E1_3rd_mention E3 G1 G2_month G2_year G10 G11 G30A G41A G41B G41C G41H G41P H1 L7_1st_mention L7_2nd_mention L7_3rd_mention Age_17_graduate Age_18_graduate year year_new age_difference
2 NA 1 1 4_39 2 2 13 3 60 3 4 5 4 6 7 5 0 0 1 0 0 0 1 5 2008 1 5 5 6 5 2 7 6 2 1 0 0 18 19 2009 0 6
2 NA 1 1 7_40 2 2 3836 2 22 3 2 2 7 7 3 4 0 0 6 0 0 5 1 6 2007 5 0 5 5 2 5 6 5 3 1 0 0 19 20 2009 0 6
2 NA 1 1 7_41 1 2 576 2 30 3 3 4 7 4 5 4 0 0 3 0 0 5 1 5 2009 5 0 7 5 6 5 7 5 2 1 0 0 17 18 2009 0 6
2 NA 1 1 10_34 2 2 3276 3 30 3 4 5 6 4 1 1 0 0 1 0 0 0 1 6 2008 1 5 7 7 5 4 7 5 2 2 0 0 18 19 2009 0 6
2 NA 1 1 14_31 2 2 713 1 10 3 5 5 7 4 4 4 0 0 1 0 0 0 1 6 2005 5 0 6 5 7 6 7 2 4 1 0 0 21 22 2009 0 6
2 NA 1 1 22_30 2 2 907 2 30 3 5 1 4 3 1 1 0 0 1 0 0 0 1 5 2006 1 1 7 6 6 6 6 6 1 2 0 0 20 21 2009 0 6

Demographics

Year

count(Long_format_2009_2015, year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   515
## 2  2015   515

Age

count(Long_format_2009_2015, Age_18_graduate)
## # A tibble: 11 × 2
##    Age_18_graduate     n
##              <dbl> <int>
##  1              15     1
##  2              18   154
##  3              19   134
##  4              20   136
##  5              21    89
##  6              22     2
##  7              24   155
##  8              25   134
##  9              26   136
## 10              27    87
## 11              28     2

Age by year(2009)

Long_format_2009_2015 %>% filter(year == 2009) %>% count(Age_18_graduate)
## # A tibble: 6 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              15     1
## 2              18   154
## 3              19   134
## 4              20   136
## 5              21    88
## 6              22     2

Age by year (2015)

Long_format_2009_2015 %>% filter(year == 2015) %>% count(Age_18_graduate)
## # A tibble: 6 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              21     1
## 2              24   155
## 3              25   134
## 4              26   136
## 5              27    87
## 6              28     2

Gender 1 = Male 2 = Female

Long_format_2009_2015 %>% group_by(year) %>% count(Gender)
## # A tibble: 4 × 3
## # Groups:   year [2]
##    year Gender     n
##   <dbl>  <dbl> <int>
## 1  2009      1   219
## 2  2009      2   296
## 3  2015      1   219
## 4  2015      2   296

G2: Years graduated high school

Long_format_2009_2015 %>% group_by(year) %>% count(G2_year)
## # A tibble: 12 × 3
## # Groups:   year [2]
##     year G2_year     n
##    <dbl>   <dbl> <int>
##  1  2009       0     4
##  2  2009    2005     2
##  3  2009    2006    88
##  4  2009    2007   135
##  5  2009    2008   133
##  6  2009    2009   153
##  7  2015       0   387
##  8  2015    2006    13
##  9  2015    2007    30
## 10  2015    2008    33
## 11  2015    2009    51
## 12  2015    2012     1
Long_format_2009_2015 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G2_year) %>% ungroup() 
## # A tibble: 8 × 3
##    year G2_year     n
##   <dbl>   <dbl> <int>
## 1  2009    2006    14
## 2  2009    2007    29
## 3  2009    2008    32
## 4  2009    2009    49
## 5  2015    2006    13
## 6  2015    2007    29
## 7  2015    2008    32
## 8  2015    2009    50
Long_format_2009_2015 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year) 
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   124
## 2  2015   124
mean_year_graduation <- Long_format_2009_2015 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(mean_graduation = mean(G2_year, na.rm = TRUE)) %>% ungroup()
mean_year_graduation
## # A tibble: 2 × 2
##    year mean_graduation
##   <dbl>           <dbl>
## 1  2009           2008.
## 2  2015           2008.
sd_year_graduation <- Long_format_2009_2015 %>% filter(G2_year != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_graduation = sd(G2_year, na.rm = TRUE)) %>% ungroup()
sd_year_graduation
## # A tibble: 2 × 2
##    year sd_graduation
##   <dbl>         <dbl>
## 1  2009          1.04
## 2  2015          1.03

L7: Race – 1st mention 1 = White 2 = Black, African-American, or Negro 3 = American Indian or Alaska Native 4 = Asian 5 = Native Hawaiian or Pacific Islander 7 = Some other race 8 = DK 9 = NA; refused

Long_format_2009_2015 %>% group_by(year) %>% count(L7_1st_mention)
## # A tibble: 13 × 3
## # Groups:   year [2]
##     year L7_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2009              1   261
##  2  2009              2   198
##  3  2009              3     7
##  4  2009              4    10
##  5  2009              7    37
##  6  2009              8     1
##  7  2009              9     1
##  8  2015              1   274
##  9  2015              2   195
## 10  2015              3     8
## 11  2015              4     8
## 12  2015              7    26
## 13  2015              9     4

L7: Race – 2nd mention

Long_format_2009_2015 %>% group_by(year) %>% count(L7_2nd_mention)
## # A tibble: 12 × 3
## # Groups:   year [2]
##     year L7_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2009              0   489
##  2  2009              1     8
##  3  2009              2     3
##  4  2009              3    10
##  5  2009              4     1
##  6  2009              5     2
##  7  2009              7     2
##  8  2015              0   497
##  9  2015              1     4
## 10  2015              2     4
## 11  2015              3     8
## 12  2015              4     2

L7: Race – 3rd mention

Long_format_2009_2015 %>% group_by(year) %>% count(L7_3rd_mention)
## # A tibble: 5 × 3
## # Groups:   year [2]
##    year L7_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2009              0   513
## 2  2009              3     2
## 3  2015              0   513
## 4  2015              1     1
## 5  2015              3     1

D2D3: Widowed – year

Long_format_2009_2015 %>% group_by(year) %>% count(D2D3_year)
## # A tibble: 6 × 3
## # Groups:   year [2]
##    year D2D3_year     n
##   <dbl>     <dbl> <int>
## 1  2009         0   512
## 2  2009      2009     3
## 3  2015         0   503
## 4  2015      2009     2
## 5  2015      2012     3
## 6  2015      2014     7

E1: Employment status – 1st mention 1 = Working now, including military 2 = Only temporarily laid off; sick or maternity leave 3 = Looking for work, unemployed 4 = Retired 5 = Disabled, permanently or temporarily 6 = Keeping house 7 = Student 8 = Other

Long_format_2009_2015 %>% group_by(year) %>% count(E1_1st_mention)
## # A tibble: 11 × 3
## # Groups:   year [2]
##     year E1_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2009              1   242
##  2  2009              3    93
##  3  2009              6    12
##  4  2009              7   167
##  5  2009             99     1
##  6  2015              1   406
##  7  2015              2     2
##  8  2015              3    49
##  9  2015              5     4
## 10  2015              6    22
## 11  2015              7    32

E1: Employment status – 2nd mention

Long_format_2009_2015 %>% group_by(year) %>% count(E1_2nd_mention)
## # A tibble: 11 × 3
## # Groups:   year [2]
##     year E1_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2009              0   301
##  2  2009              1    35
##  3  2009              2     1
##  4  2009              3    28
##  5  2009              6     3
##  6  2009              7   147
##  7  2015              0   445
##  8  2015              1     6
##  9  2015              3     4
## 10  2015              6    16
## 11  2015              7    44

E1: Employment status – 3rd mention

Long_format_2009_2015 %>% group_by(year) %>% count(E1_3rd_mention)
## # A tibble: 3 × 3
## # Groups:   year [2]
##    year E1_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2009              0   514
## 2  2009              6     1
## 3  2015              0   515

E3: Work for money 1 = Yes 5 = No 8 = DK 9 = NA; refused

Long_format_2009_2015 %>% group_by(year) %>% count(E3)
## # A tibble: 6 × 3
## # Groups:   year [2]
##    year    E3     n
##   <dbl> <dbl> <int>
## 1  2009     0   277
## 2  2009     1    31
## 3  2009     5   207
## 4  2015     0   414
## 5  2015     1     8
## 6  2015     5    93

G1: Education status 1 = Graduated from high school 2 = Got a GED 3 = Neither

Long_format_2009_2015 %>% group_by(year) %>% count(G1)
## # A tibble: 7 × 3
## # Groups:   year [2]
##    year    G1     n
##   <dbl> <dbl> <int>
## 1  2009     1   511
## 2  2009     2     3
## 3  2009     3     1
## 4  2015     0   370
## 5  2015     1   139
## 6  2015     2     5
## 7  2015     3     1

G10: Attended College 1 = Yes 5 = No

Long_format_2009_2015 %>% group_by(year) %>% count(G10)
## # A tibble: 6 × 3
## # Groups:   year [2]
##    year   G10     n
##   <dbl> <dbl> <int>
## 1  2009     0     1
## 2  2009     1   385
## 3  2009     5   129
## 4  2015     0   403
## 5  2015     1    35
## 6  2015     5    77

G11: Attending College 1 = Yes 5 = No

Long_format_2009_2015 %>% group_by(year) %>% count(G11)
## # A tibble: 6 × 3
## # Groups:   year [2]
##    year   G11     n
##   <dbl> <dbl> <int>
## 1  2009     0   130
## 2  2009     1   323
## 3  2009     5    62
## 4  2015     0    79
## 5  2015     1    86
## 6  2015     5   350

2005 & 2009 & 2015

Step 7: Demographics (2005 & 2009 & 2015)

Filter the data (2005 & 2009 & 2015)

Long_format_2005_2009_2015_new <-  TAS_data_long_format_age %>% filter(year==2005| year==2015| year == 2009) %>% filter(Age_18_graduate == 18|Age_18_graduate == 19) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1))
knitr::kable(head(Long_format_2005_2009_2015_new[, 1:42]))
TAS TAS05 TAS09 TAS15 TAS_ID Gender Individual is sample Year ID Number Sequence Number Relationship to Head Release Number B5A B5D B6C C2D C2E C2F D2D3_month D2D3_year E1_1st_mention E1_2nd_mention E1_3rd_mention E3 G1 G2_month G2_year G10 G11 G30A G41A G41B G41C G41H G41P H1 L7_1st_mention L7_2nd_mention L7_3rd_mention Age_17_graduate Age_18_graduate year year_new
1 1 NA NA 18_38 2 2 5647 3 98 5 3 4 3 4 2 2 0 0 3 7 0 5 1 6 2004 1 5 6 5 5 5 7 5 2 1 0 0 18 19 2005 -1
2 1 1 NA 47_34 2 2 2516 3 30 5 4 5 6 4 5 2 0 0 1 0 0 0 1 5 2005 5 0 6 3 6 4 7 4 1 1 0 0 17 18 2005 -1
2 1 1 NA 53_36 2 2 1616 3 30 5 4 5 7 4 1 1 0 0 6 0 0 1 1 6 2005 1 5 7 7 7 5 7 6 2 1 0 0 17 18 2005 -1
2 1 1 NA 79_32 2 2 6520 2 30 5 3 4 6 7 5 3 0 0 1 7 0 0 1 5 2004 1 1 0 7 7 6 7 4 1 1 0 0 18 19 2005 -1
2 1 1 NA 88_35 1 2 3411 2 30 5 2 5 7 3 1 2 0 0 1 0 0 0 1 5 2005 1 1 7 2 6 5 6 7 2 1 0 0 17 18 2005 -1
2 1 1 NA 89_34 2 2 4527 3 30 5 2 4 5 2 3 1 0 0 3 7 0 5 1 5 2005 1 1 7 5 6 4 7 5 1 1 0 0 17 18 2005 -1
Long_format_2005_2009_2015_new %>% count(year)
## # A tibble: 3 × 2
##    year     n
##   <dbl> <int>
## 1  2005   348
## 2  2009   367
## 3  2015   254

Demographics

Year

count(Long_format_2005_2009_2015_new, year)
## # A tibble: 3 × 2
##    year     n
##   <dbl> <int>
## 1  2005   348
## 2  2009   367
## 3  2015   254

Age

count(Long_format_2005_2009_2015_new, Age_18_graduate)
## # A tibble: 2 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18   465
## 2              19   504

Age by year(2005)

Long_format_2005_2009_2015_new %>% filter(year == 2005) %>% count(Age_18_graduate)
## # A tibble: 2 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18   179
## 2              19   169

Age by year(2009)

Long_format_2005_2009_2015_new %>% filter(year == 2009) %>% count(Age_18_graduate)
## # A tibble: 2 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18   195
## 2              19   172

Age by year (2015)

Long_format_2005_2009_2015_new %>% filter(year == 2015) %>% count(Age_18_graduate)
## # A tibble: 2 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18    91
## 2              19   163

Gender 1 = Male 2 = Female

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(Gender)
## # A tibble: 6 × 3
## # Groups:   year [3]
##    year Gender     n
##   <dbl>  <dbl> <int>
## 1  2005      1   169
## 2  2005      2   179
## 3  2009      1   165
## 4  2009      2   202
## 5  2015      1   119
## 6  2015      2   135

G2: Years graduated high school

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(G2_year)
## # A tibble: 6 × 3
## # Groups:   year [3]
##    year G2_year     n
##   <dbl>   <dbl> <int>
## 1  2005    2004   169
## 2  2005    2005   179
## 3  2009    2008   172
## 4  2009    2009   195
## 5  2015    2014   163
## 6  2015    2015    91
mean_year_graduation <- Long_format_2005_2009_2015_new %>% group_by(year) %>% summarize(mean_graduation = mean(G2_year, na.rm = TRUE)) %>% ungroup()
mean_year_graduation
## # A tibble: 3 × 2
##    year mean_graduation
##   <dbl>           <dbl>
## 1  2005           2005.
## 2  2009           2009.
## 3  2015           2014.
sd_year_graduation <- Long_format_2005_2009_2015_new %>% group_by(year) %>% summarize(sd_graduation = sd(G2_year, na.rm = TRUE)) %>% ungroup()
sd_year_graduation
## # A tibble: 3 × 2
##    year sd_graduation
##   <dbl>         <dbl>
## 1  2005         0.501
## 2  2009         0.500
## 3  2015         0.480

L7: Race – 1st mention 1 = White 2 = Black, African-American, or Negro 3 = American Indian or Alaska Native 4 = Asian 5 = Native Hawaiian or Pacific Islander 7 = Some other race 8 = DK 9 = NA; refused

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(L7_1st_mention)
## # A tibble: 23 × 3
## # Groups:   year [3]
##     year L7_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              1   192
##  2  2005              2   129
##  3  2005              3     1
##  4  2005              4     5
##  5  2005              5     3
##  6  2005              7     4
##  7  2005              8     1
##  8  2005              9    13
##  9  2009              1   184
## 10  2009              2   139
## # ℹ 13 more rows

L7: Race – 2nd mention

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(L7_2nd_mention)
## # A tibble: 18 × 3
## # Groups:   year [3]
##     year L7_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              0   341
##  2  2005              2     2
##  3  2005              3     2
##  4  2005              5     2
##  5  2005              7     1
##  6  2009              0   347
##  7  2009              1     8
##  8  2009              2     1
##  9  2009              3     6
## 10  2009              5     2
## 11  2009              7     3
## 12  2015              0   228
## 13  2015              1     4
## 14  2015              2     3
## 15  2015              3    15
## 16  2015              4     1
## 17  2015              5     1
## 18  2015              7     2

L7: Race – 3rd mention

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(L7_3rd_mention)
## # A tibble: 6 × 3
## # Groups:   year [3]
##    year L7_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2005              0   348
## 2  2009              0   364
## 3  2009              3     3
## 4  2015              0   251
## 5  2015              1     2
## 6  2015              7     1

D2D3: Widowed – year

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(D2D3_year)
## # A tibble: 3 × 3
## # Groups:   year [3]
##    year D2D3_year     n
##   <dbl>     <dbl> <int>
## 1  2005         0   348
## 2  2009         0   367
## 3  2015         0   254

E1: Employment status – 1st mention 1 = Working now, including military 2 = Only temporarily laid off; sick or maternity leave 3 = Looking for work, unemployed 4 = Retired 5 = Disabled, permanently or temporarily 6 = Keeping house 7 = Student 8 = Other

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(E1_1st_mention)
## # A tibble: 16 × 3
## # Groups:   year [3]
##     year E1_1st_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              1   163
##  2  2005              2     1
##  3  2005              3    38
##  4  2005              6     6
##  5  2005              7   136
##  6  2005              8     4
##  7  2009              1   149
##  8  2009              3    73
##  9  2009              6     5
## 10  2009              7   139
## 11  2009             99     1
## 12  2015              1   133
## 13  2015              2     1
## 14  2015              3    46
## 15  2015              6     7
## 16  2015              7    67

E1: Employment status – 2nd mention

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(E1_2nd_mention)
## # A tibble: 15 × 3
## # Groups:   year [3]
##     year E1_2nd_mention     n
##    <dbl>          <dbl> <int>
##  1  2005              0   261
##  2  2005              1    13
##  3  2005              3     5
##  4  2005              6     3
##  5  2005              7    66
##  6  2009              0   214
##  7  2009              1    19
##  8  2009              3    23
##  9  2009              6     4
## 10  2009              7   107
## 11  2015              0   158
## 12  2015              1    14
## 13  2015              3    10
## 14  2015              6     2
## 15  2015              7    70

E1: Employment status – 3rd mention

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(E1_3rd_mention)
## # A tibble: 6 × 3
## # Groups:   year [3]
##    year E1_3rd_mention     n
##   <dbl>          <dbl> <int>
## 1  2005              0   345
## 2  2005              1     1
## 3  2005              3     1
## 4  2005              7     1
## 5  2009              0   367
## 6  2015              0   254

E3: Work for money 1 = Yes 5 = No 8 = DK 9 = NA; refused

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(E3)
## # A tibble: 10 × 3
## # Groups:   year [3]
##     year    E3     n
##    <dbl> <dbl> <int>
##  1  2005     0   177
##  2  2005     1    45
##  3  2005     5   125
##  4  2005     9     1
##  5  2009     0   168
##  6  2009     1    18
##  7  2009     5   181
##  8  2015     0   149
##  9  2015     1     6
## 10  2015     5    99

G1: Education status 1 = Graduated from high school 2 = Got a GED 3 = Neither

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(G1)
## # A tibble: 3 × 3
## # Groups:   year [3]
##    year    G1     n
##   <dbl> <dbl> <int>
## 1  2005     1   348
## 2  2009     1   367
## 3  2015     1   254

G10: Attended College 1 = Yes 5 = No

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(G10)
## # A tibble: 8 × 3
## # Groups:   year [3]
##    year   G10     n
##   <dbl> <dbl> <int>
## 1  2005     1   255
## 2  2005     5    92
## 3  2005     9     1
## 4  2009     1   265
## 5  2009     5   102
## 6  2015     0     1
## 7  2015     1   173
## 8  2015     5    80

G11: Attending College 1 = Yes 5 = No

Long_format_2005_2009_2015_new %>% group_by(year) %>% count(G11)
## # A tibble: 9 × 3
## # Groups:   year [3]
##    year   G11     n
##   <dbl> <dbl> <int>
## 1  2005     0    93
## 2  2005     1   221
## 3  2005     5    34
## 4  2009     0   102
## 5  2009     1   242
## 6  2009     5    23
## 7  2015     0    80
## 8  2015     1   139
## 9  2015     5    35