#1 Create Vectors

x <- c(5,10,15,20,25,30)
y <- c(-1, NA, 75, 3, 5, 8)
z <- c(5)

#2 Multiiply the first two vectors by the z vector and store these in new objects. Print these new vectors.

x*z
## [1]  25  50  75 100 125 150
y*z
## [1]  -5  NA 375  15  25  40
A <- c(x*y)
B <- c(y*z)


print(A)
## [1]   -5   NA 1125   60  125  240
print(B)
## [1]  -5  NA 375  15  25  40

3 Data Management Questions

library(haven)
stata_PSID_w1 <- read_dta("stata_PSID_w1.dta")
View(stata_PSID_w1)
#select variables into a new a new data set
assignment1 <- subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new", "race5"))
 names(stata_PSID_w1)
##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"
 dim(stata_PSID_w1)
## [1] 131361     13
 mean(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 187.1656
 median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804

#3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

#There are 13 variables. They are:"year","sex","age","marpi","educ","adjfinc","pubhs","rnthlp","adjwlth1","adjwlth2","h_race_ethnic_new","id", and "race5"
#There are 131361 observations

dim(stata_PSID_w1)
## [1] 131361     13
names(stata_PSID_w1)
##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"
nrow(stata_PSID_w1)
## [1] 131361
ncol(stata_PSID_w1)
## [1] 13

#3.2 Show the frequency distribution of race/ethnicity variable.

hist(assignment1$race5, main="Frequency Distribution of Race")

#with labels on x-axis 
assignment1$race5<-factor(assignment1$race5,
                   levels=c(1,2,3,4,5),
                   labels=c("Latino","Asian","Black", "Other","White" ))
barplot(table(assignment1$race5), main="Frequency Distribution of Race")

#3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

mean(stata_PSID_w1$adjwlth2,na.rm=T)
## [1] 187.1656
median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804

#3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

#with one function
summary(stata_PSID_w1$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   29.00   32.03   47.00  999.00
#several functions
min(stata_PSID_w1$age)
## [1] 1
max(stata_PSID_w1$age)
## [1] 999
IQR(stata_PSID_w1$age)
## [1] 33
mean(stata_PSID_w1$age)
## [1] 32.02676
median(stata_PSID_w1$age)
## [1] 29

#3.5 How many people in the data received public assistance? How many Latinos received public assistance?

nrow(subset(stata_PSID_w1,pubhs >0))
## [1] 6961
#6961 people received public assistance


nrow(subset(stata_PSID_w1, pubhs>0 & race5 ==1))
## [1] 366
#366 Latinos received public assistance

#3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

#Is the individual native born or foreign born?
#What is the primary language of the individual?
#Is individual a registered voter?