library(ggplot2, tidyr)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv("usa_00002.csv")
required_columns <- c("BPL", "ANCESTR1", "EDUC", "SEX", "AGE")
missing_columns <- setdiff(required_columns, colnames(data))
if (length(missing_columns) > 0) {
    stop(paste("The following required columns are missing:",
        paste(missing_columns, collapse = ", ")))
}

Converting variables to appropriate types

data$BPL <- as.numeric(data$BPL)
data$ANCESTR1 <- as.numeric(data$ANCESTR1)
data$EDUC <- as.numeric(data$EDUC)
data$SEX <- as.numeric(data$SEX)
data$AGE <- as.numeric(data$AGE)

Creation of nativity variable

data$nativity <- ifelse(data$BPL >= 1 & data$BPL <= 99, "Native",
    ifelse(data$BPL >= 100 & data$BPL <= 900, "Foreign", "Other"))

Filter for Nigerians and debug to check if properly recognized

nigerians <- subset(data, ANCESTR1 == 553)

if (nrow(nigerians) == 0) {
    stop("No Nigerians found in the dataset. Verify the 'ANCESTR1' variable.")
}

Removal of invalid/missing entries

nigerians <- subset(nigerians, SEX %in% c(1, 2) & AGE >= 18 &
    !is.na(EDUC) & EDUC != 99)
ggplot(nigerians, aes(x = factor(EDUC), fill = nativity)) + geom_bar(position = "dodge") +
    labs(title = "Educational Attainment by Nativity for Nigerians",
        x = "Education Level", y = "Count", fill = "Nativity") +
    scale_x_discrete(labels = c(`0` = "No School", `1` = "Nursery-4",
        `2` = "Grade 5-8", `3` = "Grade 9", `4` = "Grade 10",
        `5` = "Grade 11", `6` = "Grade 12", `7` = "1 yr College",
        `8` = "2 yrs College", `9` = "3 yrs College", `10` = "4 yrs College",
        `11` = "5+ yrs College")) + theme_minimal() + theme(axis.text.x = element_text(angle = 45,
    hjust = 1))