library(ggplot2, tidyr)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("usa_00002.csv")
required_columns <- c("BPL", "ANCESTR1", "EDUC", "SEX", "AGE")
missing_columns <- setdiff(required_columns, colnames(data))
if (length(missing_columns) > 0) {
stop(paste("The following required columns are missing:",
paste(missing_columns, collapse = ", ")))
}
Converting variables to appropriate types
data$BPL <- as.numeric(data$BPL)
data$ANCESTR1 <- as.numeric(data$ANCESTR1)
data$EDUC <- as.numeric(data$EDUC)
data$SEX <- as.numeric(data$SEX)
data$AGE <- as.numeric(data$AGE)
Creation of nativity variable
data$nativity <- ifelse(data$BPL >= 1 & data$BPL <= 99, "Native",
ifelse(data$BPL >= 100 & data$BPL <= 900, "Foreign", "Other"))
Filter for Nigerians and debug to check if properly recognized
nigerians <- subset(data, ANCESTR1 == 553)
if (nrow(nigerians) == 0) {
stop("No Nigerians found in the dataset. Verify the 'ANCESTR1' variable.")
}
Removal of invalid/missing entries
nigerians <- subset(nigerians, SEX %in% c(1, 2) & AGE >= 18 &
!is.na(EDUC) & EDUC != 99)
ggplot(nigerians, aes(x = factor(EDUC), fill = nativity)) + geom_bar(position = "dodge") +
labs(title = "Educational Attainment by Nativity for Nigerians",
x = "Education Level", y = "Count", fill = "Nativity") +
scale_x_discrete(labels = c(`0` = "No School", `1` = "Nursery-4",
`2` = "Grade 5-8", `3` = "Grade 9", `4` = "Grade 10",
`5` = "Grade 11", `6` = "Grade 12", `7` = "1 yr College",
`8` = "2 yrs College", `9` = "3 yrs College", `10` = "4 yrs College",
`11` = "5+ yrs College")) + theme_minimal() + theme(axis.text.x = element_text(angle = 45,
hjust = 1))
