library(haven)
baby2.0 <- read_sav("TheaWulff_Dataset 9.17.24Updated.sav")
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID demo3mA demo46mA demo47bmA demo47a4mA demo47cmA demo40mA demo41mA
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lb> <dbl+lb>
## 1 4 25 1 [Hispanic… 5 [White] "" 10 [Whit… 4 [Bach… NA
## 2 6 24 0 [Not Hisp… 5 [White] "" 9 [Whit… 3 [Asso… NA
## 3 8 36 0 [Not Hisp… 6 [Prefe… "" 11 [Self… 5 [Any … 3 [Emp…
## 4 9 31 0 [Not Hisp… 6 [Prefe… "" 11 [Self… 4 [Bach… 3 [Emp…
## 5 10 27 0 [Not Hisp… 2 [Asian] "" 3 [Asia… 5 [Any … 3 [Emp…
## 6 16 27 0 [Not Hisp… 5 [White] "" 9 [Whit… 4 [Bach… 3 [Emp…
## # ℹ 31 more variables: demo42mA <dbl+lbl>, demo44mA <dbl+lbl>, demo52mA <dbl>,
## # sexmB <dbl+lbl>, DERStotmA <dbl>, RetentionC <dbl+lbl>, demo35mC <dbl+lbl>,
## # demo36bmC <dbl+lbl>, demo36amC <chr>, demo36cmC <dbl+lbl>,
## # ConsiderExcluding <dbl>, TsensmC1 <dbl>, TintrmC1 <dbl>, TdetmC1 <dbl>,
## # TprmC1 <dbl>, TdistcC1 <dbl>, RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>,
## # ITSEAintmD1 <dbl>, ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>,
## # ITSEAextTmD1 <dbl>, ITSEAintTmD1 <dbl>, ITSEAdysTmD1 <dbl>, …
# Rename column for readability.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
baby2.0 <- baby2.0 %>%
rename(MotherRace = demo47bmA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID demo3mA demo46mA MotherRace demo47a4mA demo47cmA demo40mA demo41mA
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lb> <dbl+lb>
## 1 4 25 1 [Hispani… 5 [White] "" 10 [Whit… 4 [Bach… NA
## 2 6 24 0 [Not His… 5 [White] "" 9 [Whit… 3 [Asso… NA
## 3 8 36 0 [Not His… 6 [Prefer… "" 11 [Self… 5 [Any … 3 [Emp…
## 4 9 31 0 [Not His… 6 [Prefer… "" 11 [Self… 4 [Bach… 3 [Emp…
## 5 10 27 0 [Not His… 2 [Asian] "" 3 [Asia… 5 [Any … 3 [Emp…
## 6 16 27 0 [Not His… 5 [White] "" 9 [Whit… 4 [Bach… 3 [Emp…
## # ℹ 31 more variables: demo42mA <dbl+lbl>, demo44mA <dbl+lbl>, demo52mA <dbl>,
## # sexmB <dbl+lbl>, DERStotmA <dbl>, RetentionC <dbl+lbl>, demo35mC <dbl+lbl>,
## # demo36bmC <dbl+lbl>, demo36amC <chr>, demo36cmC <dbl+lbl>,
## # ConsiderExcluding <dbl>, TsensmC1 <dbl>, TintrmC1 <dbl>, TdetmC1 <dbl>,
## # TprmC1 <dbl>, TdistcC1 <dbl>, RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>,
## # ITSEAintmD1 <dbl>, ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>,
## # ITSEAextTmD1 <dbl>, ITSEAintTmD1 <dbl>, ITSEAdysTmD1 <dbl>, …
# Check MotherRace column for missing values
sum(is.na(baby2.0$MotherRace))
## [1] 12
# Remove rows with NA in MotherRace
baby2.0 <- baby2.0 %>%
filter(!is.na(MotherRace))
# Percentages from the MotherRace column
RacePercentage <- baby2.0 %>%
summarise(
AmericanIndian_percentage = mean(MotherRace == 1) * 100,
Asian_percentage = mean(MotherRace == 2) * 100,
PacificIslander_percentage = mean(MotherRace == 3) * 100,
Black_percentage = mean(MotherRace == 4) * 100,
White_percentage = mean(MotherRace == 5) * 100,
SelfReport_percentage = mean(MotherRace == 6) * 100,
MoreThanOneRace_percentage = mean(MotherRace == 7) * 100,
DeclineToAnswer_percentage = mean(MotherRace == 99) * 100
)
print(RacePercentage)
## # A tibble: 1 × 8
## AmericanIndian_percentage Asian_percentage PacificIslander_percentage
## <dbl> <dbl> <dbl>
## 1 1.88 6.97 1.88
## # ℹ 5 more variables: Black_percentage <dbl>, White_percentage <dbl>,
## # SelfReport_percentage <dbl>, MoreThanOneRace_percentage <dbl>,
## # DeclineToAnswer_percentage <dbl>
race <- c("American Indian or Alaskan Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White",
"Hispanic or Latinx", "Prefer to self-report", "More than 1 race")
percentage <- c(1.89, 7.03, 1.89, 2.97, 76.76, 22.20, 5.14, 4.32)
RaceTable <- data.frame(Race = race, Percentage = percentage)
print(RaceTable)
## Race Percentage
## 1 American Indian or Alaskan Native 1.89
## 2 Asian 7.03
## 3 Native Hawaiian or Other Pacific Islander 1.89
## 4 Black or African American 2.97
## 5 White 76.76
## 6 Hispanic or Latinx 22.20
## 7 Prefer to self-report 5.14
## 8 More than 1 race 4.32
# Average mother age
library(dplyr)
baby2.0 <- baby2.0 %>%
rename(MotherAge = demo3mA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID MotherAge demo46mA MotherRace demo47a4mA demo47cmA demo40mA demo41mA
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lb> <dbl+lb>
## 1 4 25 1 [Hispa… 5 [White] "" 10 [Whit… 4 [Bach… NA
## 2 6 24 0 [Not H… 5 [White] "" 9 [Whit… 3 [Asso… NA
## 3 8 36 0 [Not H… 6 [Prefer… "" 11 [Self… 5 [Any … 3 [Emp…
## 4 9 31 0 [Not H… 6 [Prefer… "" 11 [Self… 4 [Bach… 3 [Emp…
## 5 10 27 0 [Not H… 2 [Asian] "" 3 [Asia… 5 [Any … 3 [Emp…
## 6 16 27 0 [Not H… 5 [White] "" 9 [Whit… 4 [Bach… 3 [Emp…
## # ℹ 31 more variables: demo42mA <dbl+lbl>, demo44mA <dbl+lbl>, demo52mA <dbl>,
## # sexmB <dbl+lbl>, DERStotmA <dbl>, RetentionC <dbl+lbl>, demo35mC <dbl+lbl>,
## # demo36bmC <dbl+lbl>, demo36amC <chr>, demo36cmC <dbl+lbl>,
## # ConsiderExcluding <dbl>, TsensmC1 <dbl>, TintrmC1 <dbl>, TdetmC1 <dbl>,
## # TprmC1 <dbl>, TdistcC1 <dbl>, RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>,
## # ITSEAintmD1 <dbl>, ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>,
## # ITSEAextTmD1 <dbl>, ITSEAintTmD1 <dbl>, ITSEAdysTmD1 <dbl>, …
average_MotherAge <- mean(baby2.0$MotherAge, na.rm = TRUE)
print(average_MotherAge)
## [1] 29.38606
# Median household income
library(dplyr)
baby2.0 <- baby2.0 %>%
rename(MedianIncome = demo44mA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID MotherAge demo46mA MotherRace demo47a4mA demo47cmA demo40mA demo41mA
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lb> <dbl+lb>
## 1 4 25 1 [Hispa… 5 [White] "" 10 [Whit… 4 [Bach… NA
## 2 6 24 0 [Not H… 5 [White] "" 9 [Whit… 3 [Asso… NA
## 3 8 36 0 [Not H… 6 [Prefer… "" 11 [Self… 5 [Any … 3 [Emp…
## 4 9 31 0 [Not H… 6 [Prefer… "" 11 [Self… 4 [Bach… 3 [Emp…
## 5 10 27 0 [Not H… 2 [Asian] "" 3 [Asia… 5 [Any … 3 [Emp…
## 6 16 27 0 [Not H… 5 [White] "" 9 [Whit… 4 [Bach… 3 [Emp…
## # ℹ 31 more variables: demo42mA <dbl+lbl>, MedianIncome <dbl+lbl>,
## # demo52mA <dbl>, sexmB <dbl+lbl>, DERStotmA <dbl>, RetentionC <dbl+lbl>,
## # demo35mC <dbl+lbl>, demo36bmC <dbl+lbl>, demo36amC <chr>,
## # demo36cmC <dbl+lbl>, ConsiderExcluding <dbl>, TsensmC1 <dbl>,
## # TintrmC1 <dbl>, TdetmC1 <dbl>, TprmC1 <dbl>, TdistcC1 <dbl>,
## # RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>, ITSEAintmD1 <dbl>,
## # ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>, ITSEAextTmD1 <dbl>, …
median_income <- median(baby2.0$MedianIncome, na.rm = TRUE)
print(median_income)
## [1] 8
## Not sure whether I did this correctly.
## Output is 8 and SPSS says an 8 is $50,000-$79,999
# Maternal education
library(dplyr)
baby2.0 <- baby2.0 %>%
rename(Education = demo40mA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID MotherAge demo46mA MotherRace demo47a4mA demo47cmA Education demo41mA
## <dbl> <dbl> <dbl+lb> <dbl+lbl> <chr> <dbl+lbl> <dbl+lbl> <dbl+lb>
## 1 4 25 1 [Hisp… 5 [White] "" 10 [Whit… 4 [Bache… NA
## 2 6 24 0 [Not … 5 [White] "" 9 [Whit… 3 [Assoc… NA
## 3 8 36 0 [Not … 6 [Prefer… "" 11 [Self… 5 [Any g… 3 [Emp…
## 4 9 31 0 [Not … 6 [Prefer… "" 11 [Self… 4 [Bache… 3 [Emp…
## 5 10 27 0 [Not … 2 [Asian] "" 3 [Asia… 5 [Any g… 3 [Emp…
## 6 16 27 0 [Not … 5 [White] "" 9 [Whit… 4 [Bache… 3 [Emp…
## # ℹ 31 more variables: demo42mA <dbl+lbl>, MedianIncome <dbl+lbl>,
## # demo52mA <dbl>, sexmB <dbl+lbl>, DERStotmA <dbl>, RetentionC <dbl+lbl>,
## # demo35mC <dbl+lbl>, demo36bmC <dbl+lbl>, demo36amC <chr>,
## # demo36cmC <dbl+lbl>, ConsiderExcluding <dbl>, TsensmC1 <dbl>,
## # TintrmC1 <dbl>, TdetmC1 <dbl>, TprmC1 <dbl>, TdistcC1 <dbl>,
## # RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>, ITSEAintmD1 <dbl>,
## # ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>, ITSEAextTmD1 <dbl>, …
# Finding the percent of women with a value of 4 or 5, indicating
# a bachelor's degree or any graduate school
education_4or5 <- baby2.0$Education
filtered_4or5 <- education_4or5[education_4or5 %in% c(4, 5)]
percentage <- (length(filtered_4or5) / length(education_4or5)) * 100
print(percentage)
## [1] 54.42359
# Hispanic/Latinx percentage
baby2.0 <- baby2.0 %>%
rename(MotherHisp = demo46mA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID MotherAge MotherHisp MotherRace demo47a4mA demo47cmA Education
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lbl>
## 1 4 25 1 [Hispanic or L… 5 [White] "" 10 [Whit… 4 [Bache…
## 2 6 24 0 [Not Hispanic … 5 [White] "" 9 [Whit… 3 [Assoc…
## 3 8 36 0 [Not Hispanic … 6 [Prefer… "" 11 [Self… 5 [Any g…
## 4 9 31 0 [Not Hispanic … 6 [Prefer… "" 11 [Self… 4 [Bache…
## 5 10 27 0 [Not Hispanic … 2 [Asian] "" 3 [Asia… 5 [Any g…
## 6 16 27 0 [Not Hispanic … 5 [White] "" 9 [Whit… 4 [Bache…
## # ℹ 32 more variables: demo41mA <dbl+lbl>, demo42mA <dbl+lbl>,
## # MedianIncome <dbl+lbl>, demo52mA <dbl>, sexmB <dbl+lbl>, DERStotmA <dbl>,
## # RetentionC <dbl+lbl>, demo35mC <dbl+lbl>, demo36bmC <dbl+lbl>,
## # demo36amC <chr>, demo36cmC <dbl+lbl>, ConsiderExcluding <dbl>,
## # TsensmC1 <dbl>, TintrmC1 <dbl>, TdetmC1 <dbl>, TprmC1 <dbl>,
## # TdistcC1 <dbl>, RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>,
## # ITSEAintmD1 <dbl>, ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>, …
sum(is.na(baby2.0$MotherRace))
## [1] 0
baby2.0 <- baby2.0 %>%
filter(!is.na(MotherHisp))
# Find the percentage of mothers who are Hispanic or Latinx
RacePercentage <- baby2.0 %>%
summarise(Hispanic_percentage = mean(MotherHisp == 1) * 100)
print(RacePercentage)
## # A tibble: 1 × 1
## Hispanic_percentage
## <dbl>
## 1 22.2
# Find the percentage of mothers who are Hispanic or Latinx
RacePercentage <- baby2.0 %>%
summarise(Hispanic_percentage = mean(MotherHisp == 1) * 100)
print(RacePercentage)
## # A tibble: 1 × 1
## Hispanic_percentage
## <dbl>
## 1 22.2
# Find the percentage of mothers who are White and NOT Hispanic or Latinx
baby2.0 <- baby2.0 %>%
rename(MotherRace2 = demo47cmA)
head(baby2.0)
## # A tibble: 6 × 39
## FamilyID MotherAge MotherHisp MotherRace demo47a4mA MotherRace2 Education
## <dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl+lbl> <dbl+lbl>
## 1 4 25 1 [Hispanic or… 5 [White] "" 10 [White … 4 [Bache…
## 2 6 24 0 [Not Hispani… 5 [White] "" 9 [White … 3 [Assoc…
## 3 8 36 0 [Not Hispani… 6 [Prefer… "" 11 [Self-r… 5 [Any g…
## 4 9 31 0 [Not Hispani… 6 [Prefer… "" 11 [Self-r… 4 [Bache…
## 5 10 27 0 [Not Hispani… 2 [Asian] "" 3 [Asian … 5 [Any g…
## 6 16 27 0 [Not Hispani… 5 [White] "" 9 [White … 4 [Bache…
## # ℹ 32 more variables: demo41mA <dbl+lbl>, demo42mA <dbl+lbl>,
## # MedianIncome <dbl+lbl>, demo52mA <dbl>, sexmB <dbl+lbl>, DERStotmA <dbl>,
## # RetentionC <dbl+lbl>, demo35mC <dbl+lbl>, demo36bmC <dbl+lbl>,
## # demo36amC <chr>, demo36cmC <dbl+lbl>, ConsiderExcluding <dbl>,
## # TsensmC1 <dbl>, TintrmC1 <dbl>, TdetmC1 <dbl>, TprmC1 <dbl>,
## # TdistcC1 <dbl>, RetentionD1 <dbl+lbl>, ITSEAextmD1 <dbl>,
## # ITSEAintmD1 <dbl>, ITSEAdysmD1 <dbl>, ITSEAcompmD1 <dbl>, …
# Percentages from the MotherRace2 column
RacePercentage2 <- baby2.0 %>%
summarise(
AmericanIndianNotHL_percentage = mean(MotherRace == 1) * 100,
AmericanIndianHL_percentage = mean(MotherRace == 2) * 100,
AsianNotHL_percentage = mean(MotherRace == 3) * 100,
AsianHL_percentage = mean(MotherRace == 4) * 100,
PacificIslanderNotHL_percentage = mean(MotherRace == 5) * 100,
PacificIslanderHL_percentage = mean(MotherRace == 6) * 100,
BlackNotHL_percentage = mean(MotherRace == 7) * 100,
BlackHL_percentage = mean(MotherRace == 8) * 100,
WhiteNotHL_percentage = mean(MotherRace == 9) * 100,
WhiteHL_percentage = mean(MotherRace == 10) * 100,
SelfReportNotHL_percentage = mean(MotherRace == 11) * 100,
SelfReportHL_percentage = mean(MotherRace == 12) * 100,
MoreThanOneRaceNotHL_percentage = mean(MotherRace == 13) * 100,
MoreThanOneRaceHL_percentage = mean(MotherRace == 14) * 100,
NoRaceSelectedHL_percentage = mean(MotherRace == 15) * 100,
NoRaceSelectedNotHL_percentage = mean(MotherRace == 16) * 100,
DeclineToAnswer_percentage = mean(MotherRace == 99) * 100
)
print(RacePercentage2)
## # A tibble: 1 × 17
## AmericanIndianNotHL_percentage AmericanIndianHL_percen…¹ AsianNotHL_percentage
## <dbl> <dbl> <dbl>
## 1 1.89 7.03 1.89
## # ℹ abbreviated name: ¹AmericanIndianHL_percentage
## # ℹ 14 more variables: AsianHL_percentage <dbl>,
## # PacificIslanderNotHL_percentage <dbl>, PacificIslanderHL_percentage <dbl>,
## # BlackNotHL_percentage <dbl>, BlackHL_percentage <dbl>,
## # WhiteNotHL_percentage <dbl>, WhiteHL_percentage <dbl>,
## # SelfReportNotHL_percentage <dbl>, SelfReportHL_percentage <dbl>,
## # MoreThanOneRaceNotHL_percentage <dbl>, …
mother_race_9 <- baby2.0$MotherRace2
filtered_9 <- mother_race_9[mother_race_9 == 9]
percentage_9 <- (length(filtered_9) / length(mother_race_9)) * 100
print(percentage_9)
## [1] 59.18919