Lab 9/16

Question 3

library(haven)
library(readr)
stata<- read_dta("C:/Users/maman/OneDrive/DEM Fall 2020/DEM 7273/stata_PSID_w1.dta")
##view the data*
View(stata)        
hist(stata$race5)

##select variables into a new data set*
assignment1<-subset(x=stata,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new","race5"))
## 3.1
names(stata)

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

dim(stata)

## [1] 131361     13

# 3.2
stata$race5<-factor(stata$race5,
                    levels=c(1,2,3,4,5),
                    labels=c( "Latino", "Asian", "Black", "Other" ,"White"))

barplot(prop.table(table(stata$race5)))

barplot(table(stata$race5))

# 3.3

mean(stata$adjwlth2, na.rm=T)

## [1] 187.1656

median(stata$adjwlth2, na.rm=T)

## [1] 32.804

# 3.4
min(stata$age)

## [1] 1

max(stata$age)

## [1] 999

median(stata$age)

## [1] 29

mean(stata$age)

## [1] 32.02676

IQR(stata$age)

## [1] 33

# 3.5

table(stata$rnthlp)

## 
##      0      1 
## 128150   3163

dplyr::filter(stata, stata$rnthlp == "1" & stata$race5 == "Latino")

## # A tibble: 153 x 13
##     year sex     age marpi  educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
##    <dbl> <chr> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>    <dbl>    <dbl>
##  1  2009 fema~    28     1    11   14.2      0      1  7.99e+0  7.99e+0
##  2  2009 male     25     1    12   27.4      0      1 -1.46e+1 -1.46e+1
##  3  2009 male      4     0    11   14.2      0      1  7.99e+0  7.99e+0
##  4  2009 male      4     0    12   27.4      0      1 -1.46e+1 -1.46e+1
##  5  2009 male      2     0    11   14.2      0      1  7.99e+0  7.99e+0
##  6  2009 fema~     1     0    12   27.4      0      1 -1.46e+1 -1.46e+1
##  7  2009 fema~    28     1    12   27.4      0      1 -1.46e+1 -1.46e+1
##  8  2009 fema~     5     0    12   27.4      0      1 -1.46e+1 -1.46e+1
##  9  2009 male     27     1    11   14.2      0      1  7.99e+0  7.99e+0
## 10  2003 fema~    92     0     0    7.58     0      1  4.78e-3  4.78e-3
## # ... with 143 more rows, and 3 more variables: h_race_ethnic_new <chr>,
## #   id <dbl>, race5 <fct>

#Question 3.6 Geographical variables such as state or county would be helpful along with employment status, and place of birth (foreign or domestic).

##content

head(stata) #shows the first few rows

## # A tibble: 6 x 13
##    year sex     age marpi  educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
##   <dbl> <chr> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>    <dbl>    <dbl>
## 1  2001 male     49     1     9    50.9     0      0    23.0      113.
## 2  2003 male     51     1     9    31.1     0      0     3.83     119.
## 3  2005 male     53     1     9    21.3     0      0     6.55     116.
## 4  2007 male     55     1     9    76.5     0      0    26.3      129.
## 5  2009 male     57     1     9    19.9     0      0    12.1      112.
## 6  2011 male     59     1    10    30.9     0      0     4.82     104.
## # ... with 3 more variables: h_race_ethnic_new <chr>, id <dbl>, race5 <fct>

tail(stata) #shows the last few rows

## # A tibble: 6 x 13
##    year sex     age marpi  educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
##   <dbl> <chr> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>    <dbl>    <dbl>
## 1  2009 male     29     1    12    75.2     0      0   -20.8    -20.8 
## 2  2011 male     31     1    11    65.1     0      0   -18.0    -18.0 
## 3  2009 fema~    22     1    12    30.7     0      0   208.     208.  
## 4  2011 fema~    24     1    12    59.7     0      0     7.66     7.66
## 5  2009 fema~     2     0    12    30.7     0      0   208.     208.  
## 6  2011 fema~     4     0    12    59.7     0      0     7.66     7.66
## # ... with 3 more variables: h_race_ethnic_new <chr>, id <dbl>, race5 <fct>

##size

dim(stata) #shows number of observations and columns

## [1] 131361     13

nrow(stata) #number of observations

## [1] 131361

ncol(stata) #number of columns/variables

## [1] 13

##summary

colnames(stata) #names of columns

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

str(stata)

## tibble [131,361 x 13] (S3: tbl_df/tbl/data.frame)
##  $ year             : num [1:131361] 2001 2003 2005 2007 2009 ...
##   ..- attr(*, "label")= chr "Year"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ sex              : chr [1:131361] "male" "male" "male" "male" ...
##   ..- attr(*, "label")= chr "Sex of respondent"
##   ..- attr(*, "format.stata")= chr "%9s"
##  $ age              : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
##   ..- attr(*, "label")= chr "Age of respondent"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ marpi            : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Marital pairs indicator"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ educ             : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
##   ..- attr(*, "label")= chr "Years completed education"
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjfinc          : num [1:131361] 50.9 31.1 21.3 76.5 19.9 ...
##   ..- attr(*, "label")= chr "Family income in prev yr in 1000s of year 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ pubhs            : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = lives in public housing"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ rnthlp           : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = received govt rent assistance"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ adjwlth1         : num [1:131361] 23.05 3.83 6.55 26.29 12.14 ...
##   ..- attr(*, "label")= chr "Wealth (excluding home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjwlth2         : num [1:131361] 113 119 116 129 112 ...
##   ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
##   ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
##   ..- attr(*, "format.stata")= chr "%16s"
##  $ id               : num [1:131361] 4003 4003 4003 4003 4003 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ race5            : Factor w/ 5 levels "Latino","Asian",..: 5 5 5 5 5 5 5 5 5 5 ...

summary(stata) #more relevant to small data sets

##       year          sex                 age             marpi       
##  Min.   :2001   Length:131361      Min.   :  1.00   Min.   :0.0000  
##  1st Qu.:2003   Class :character   1st Qu.: 14.00   1st Qu.:0.0000  
##  Median :2007   Mode  :character   Median : 29.00   Median :0.0000  
##  Mean   :2006                      Mean   : 32.03   Mean   :0.4178  
##  3rd Qu.:2009                      3rd Qu.: 47.00   3rd Qu.:1.0000  
##  Max.   :2011                      Max.   :999.00   Max.   :4.0000  
##                                                     NA's   :28      
##       educ          adjfinc            pubhs             rnthlp       
##  Min.   : 0.00   Min.   :-929.60   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:12.00   1st Qu.:  24.04   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :12.00   Median :  45.18   Median :0.00000   Median :0.00000  
##  Mean   :13.04   Mean   :  60.39   Mean   :0.05301   Mean   :0.02409  
##  3rd Qu.:15.00   3rd Qu.:  75.31   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :20.00   Max.   :5044.84   Max.   :1.00000   Max.   :1.00000  
##  NA's   :2496    NA's   :48        NA's   :34        NA's   :48       
##     adjwlth1           adjwlth2        h_race_ethnic_new        id         
##  Min.   :-2467.18   Min.   :-2304.98   Length:131361      Min.   :   4003  
##  1st Qu.:    0.01   1st Qu.:    1.91   Class :character   1st Qu.:1269033  
##  Median :    9.98   Median :   32.80   Mode  :character   Median :2464171  
##  Mean   :  129.48   Mean   :  187.17                      Mean   :3014466  
##  3rd Qu.:   58.05   3rd Qu.:  143.55                      3rd Qu.:5381175  
##  Max.   :80199.41   Max.   :80303.23                      Max.   :6872185  
##  NA's   :48         NA's   :48                                             
##     race5      
##  Latino: 9893  
##  Asian : 2118  
##  Black :46935  
##  Other : 1134  
##  White :71281  
##                
##

dplyr
1. filter
2. select
3. mutate
4. arrange
5. summarize
6. group_by



## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:


```r
summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Lab 9/16

Daniel Mamani

9/16/2020

Question 1 and 2

Question 3

Including Plots