#2 Multiiply the first two vectors by the z vector and store these in new objects. Print these new vectors.

3 Data Management Questions

library(haven)
stata_PSID_w1 <- read_dta("stata_PSID_w1.dta")
View(stata_PSID_w1)
#select variables into a new a new data set
assignment1 <- subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new", "race5"))
 names(stata_PSID_w1)

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

 dim(stata_PSID_w1)

## [1] 131361     13

 mean(stata_PSID_w1$adjwlth2,na.rm = T)

## [1] 187.1656

 median(stata_PSID_w1$adjwlth2,na.rm = T)

## [1] 32.804

#3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

#There are 13 variables. They are:"year","sex","age","marpi","educ","adjfinc","pubhs","rnthlp","adjwlth1","adjwlth2","h_race_ethnic_new","id", and "race5"
#There are 131361 observations

dim(stata_PSID_w1)

## [1] 131361     13

names(stata_PSID_w1)

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

nrow(stata_PSID_w1)

## [1] 131361

ncol(stata_PSID_w1)

## [1] 13

#3.2 Show the frequency distribution of race/ethnicity variable.

hist(assignment1$race5, main="Frequency Distribution of Race")

#with labels on x-axis 
assignment1$race5<-factor(assignment1$race5,
                   levels=c(1,2,3,4,5),
                   labels=c("Latino","Asian","Black", "Other","White" ))
barplot(table(assignment1$race5), main="Frequency Distribution of Race")

#3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

mean(stata_PSID_w1$adjwlth2,na.rm=T)

## [1] 187.1656

median(stata_PSID_w1$adjwlth2,na.rm = T)

## [1] 32.804

#3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

#with one function
summary(stata_PSID_w1$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   29.00   32.03   47.00  999.00

#several functions
min(stata_PSID_w1$age)

## [1] 1

max(stata_PSID_w1$age)

## [1] 999

IQR(stata_PSID_w1$age)

## [1] 33

mean(stata_PSID_w1$age)

## [1] 32.02676

median(stata_PSID_w1$age)

## [1] 29

#3.5 How many people in the data received public assistance? How many Latinos received public assistance?

nrow(subset(stata_PSID_w1,pubhs >0))

## [1] 6961

#6961 people received public assistance


nrow(subset(stata_PSID_w1, pubhs>0 & race5 ==1))

## [1] 366

#366 Latinos received public assistance

#3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

#Is the individual native born or foreign born?
#What is the primary language of the individual?
#Is individual a registered voter?

Lab Session 9/16/20

Julie A. Gonzalez

9/20/2020

3 Data Management Questions