library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ipumsr)
library(readr)
library(ggplot2)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(haven)

#1 Create the vectors x, y and z, with values shown below in order. Note, ignore the [1], do not include that in the vector.

x<- c(5, 10, 15, 20, 25, 30)
y <- c(-1, NA, 75,  3,  5,  8)  
z <- c(5)
x
## [1]  5 10 15 20 25 30
y
## [1] -1 NA 75  3  5  8
z
## [1] 5

#2. Multiply the first two vectors by the z vector, and store these in new objects. Print these new vectors.

x*z
## [1]  25  50  75 100 125 150
y*z
## [1]  -5  NA 375  15  25  40
A <- c(25, 50,  75, 100, 125, 150, -5, NA, 375,  15,  25,  40)
print(A)
##  [1]  25  50  75 100 125 150  -5  NA 375  15  25  40

#3. Data Management Questions Go to blackboard,content, assignments, stata_PSID_w1.dta Download stata_PSID_w1.dta data onto your computer Please run the following codes to generate a data frame for following questions

library(haven)
library(readr)
stata_PSID_w1 <- read_dta("C:/Users/codar/OneDrive/Documents/R/Fall2020 - 7273/stata_PSID_w1.dta")
View(stata_PSID_w1)
names(stata_PSID_w1)
##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"
assignment1<-subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new"))

##3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

ncol(stata_PSID_w1)
## [1] 13
names(stata_PSID_w1)
##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"
nrow(stata_PSID_w1) #or use 'dim' function to see observations and variables all at once.
## [1] 131361
dim(stata_PSID_w1)
## [1] 131361     13

##3.2 Show the frequency distribution of race/ethnicity variable.

hist(stata_PSID_w1$race5)   

##3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

mean(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 187.1656
median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804
#or using subset created from #3 instructions
mean(assignment1$adjwlth2, na.rm = T)
## [1] 187.1656
median(assignment1$adjwlth2, na.rm = T)
## [1] 32.804

##3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

min(stata_PSID_w1$age)
## [1] 1
max(stata_PSID_w1$age)
## [1] 999
IQR(stata_PSID_w1$age)
## [1] 33
median(stata_PSID_w1$age)
## [1] 29
mean(stata_PSID_w1$age)
## [1] 32.02676

##3.5 How many people in the data received public assistance? How many Latinos received public assistance?

#finding conditionally met observations by creating a subset 
recpubhs <- subset(x=stata_PSID_w1, pubhs > "0") 
View(recpubhs)
count(recpubhs)
## # A tibble: 1 x 1
##       n
##   <int>
## 1  6961
#how many Latinos received pubhs
nrow(subset(stata_PSID_w1, pubhs > 0 & race5 == 1))
## [1] 366
#I also used this command to find how many observations overall recieved pubhs and got same result as subset function
nrow(subset(stata_PSID_w1, pubhs > 0)) 
## [1] 6961

##3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

#occupation
#urban/rural (or zip code)
#number of children