#1 Create Vectors
x <- c(5,10,15,20,25,30)
y <- c(-1, NA, 75, 3, 5, 8)
z <- c(5)
#2 Multiiply the first two vectors by the z vector and store these in new objects. Print these new vectors.
x*z
## [1] 25 50 75 100 125 150
y*z
## [1] -5 NA 375 15 25 40
A <- c(x*y)
B <- c(y*z)
print(A)
## [1] -5 NA 1125 60 125 240
print(B)
## [1] -5 NA 375 15 25 40
library(haven)
stata_PSID_w1 <- read_dta("stata_PSID_w1.dta")
View(stata_PSID_w1)
#select variables into a new a new data set
assignment1 <- subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new", "race5"))
names(stata_PSID_w1)
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
dim(stata_PSID_w1)
## [1] 131361 13
mean(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 187.1656
median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804
#3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?
#There are 13 variables. They are:"year","sex","age","marpi","educ","adjfinc","pubhs","rnthlp","adjwlth1","adjwlth2","h_race_ethnic_new","id", and "race5"
#There are 131361 observations
dim(stata_PSID_w1)
## [1] 131361 13
names(stata_PSID_w1)
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
nrow(stata_PSID_w1)
## [1] 131361
ncol(stata_PSID_w1)
## [1] 13
#3.2 Show the frequency distribution of race/ethnicity variable.
hist(assignment1$race5, main="Frequency Distribution of Race")
#with labels on x-axis
assignment1$race5<-factor(assignment1$race5,
levels=c(1,2,3,4,5),
labels=c("Latino","Asian","Black", "Other","White" ))
barplot(table(assignment1$race5), main="Frequency Distribution of Race")
#3.3 What’s the mean and median for adjwlth2(wealth including home equity)?
mean(stata_PSID_w1$adjwlth2,na.rm=T)
## [1] 187.1656
median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804
#3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)
#with one function
summary(stata_PSID_w1$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.00 29.00 32.03 47.00 999.00
#several functions
min(stata_PSID_w1$age)
## [1] 1
max(stata_PSID_w1$age)
## [1] 999
IQR(stata_PSID_w1$age)
## [1] 33
mean(stata_PSID_w1$age)
## [1] 32.02676
median(stata_PSID_w1$age)
## [1] 29
#3.5 How many people in the data received public assistance? How many Latinos received public assistance?
nrow(subset(stata_PSID_w1,pubhs >0))
## [1] 6961
#6961 people received public assistance
nrow(subset(stata_PSID_w1, pubhs>0 & race5 ==1))
## [1] 366
#366 Latinos received public assistance
#3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.
#Is the individual native born or foreign born?
#What is the primary language of the individual?
#Is individual a registered voter?