Data being used is from 5 Year Estimates from ACS of 2012 and 2022. Specifically this overview focuses on Table S1601
# ddi <- read_ipums_ddi("usa_00009.xml")
# all_indicator_data <- read_ipums_micro(ddi)
#2022
poverty_2022 <- read.csv("../ACS_DATA/2022/ACSDT5Y2022.B16009-Data.csv")
language_2022 <- read.csv("../ACS_DATA/2022/ACSST5Y2022.S1601-Data.csv")
social_2022 <- read.csv("../ACS_DATA/2022/ACSCP5Y2022.CP02-Data.csv")
characteristics_2022 <- read.csv("../ACS_DATA/2022/ACSST5Y2022.S1603-Data.csv")
limited_eng_2022 <- read.csv("../ACS_DATA/2022/ACSST5Y2022.S1602-Data.csv")
household_2022 <- read.csv("../ACS_DATA/2022/ACSDT5Y2022.B16002-Data.csv")
education_2022 <- read.csv("../ACS_DATA/2022/ACSDT5Y2022.B16010-Data.csv")
#2012
language_2012 <- read.csv("../ACS_DATA/2012/ACSST5Y2012.S1601-Data.csv")
#location data
regions <- read.csv("../location_data/County_12_Regions.csv")
rural_urban <-read.csv("../location_data/rural_urban.csv")
Overall methodology: Determine bilingualism by seeing proportion of each language other than English category that speaks English ‘very well’. Will then use this population in further aggregation and trend analyses.
bilangualism_2012 <- language_2012 |>
select(Geographic.Area.Name,Total..Estimate..Population.5.years.and.over,
Total..Estimate..Speak.a.language.other.than.English,
Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English.,
#Selecting percents
Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Spanish.or.Spanish.Creole.,
Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Asian.and.Pacific.Island.languages.,
Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Other.Indo.European.languages.,
Percent.of.specified.language.speakers..Speak.English..less.than.very.well...Estimate..Speak.a.language.other.than.English..Other.languages.,
#Selecting bilangual populations
Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish.or.Spanish.Creole,
Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages,
Total..Estimate..Speak.a.language.other.than.English..Other.Indo.European.languages,
Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages,
Total..Estimate..Speak.only.English
) |>
#Converting to decimal for later perposes
mutate(Total..Estimate..Speak.a.language.other.than.English = Total..Estimate..Speak.a.language.other.than.English/ 100) |>
#renaming for clarity cause the other names are so confusing and makes it easier to just code normally later on
rename(NonEnglish_Language_Estimate = Total..Estimate..Speak.a.language.other.than.English,
Spanish_Estimate = Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish.or.Spanish.Creole,
Asian_Pacific_Estimate = Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages,
Other_Indo_Europe_Estimate = Total..Estimate..Speak.a.language.other.than.English..Other.Indo.European.languages,
Other_Estimate = Total..Estimate..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages,
English_Estimate = Total..Estimate..Speak.only.English) |>
#grabbing bilangual proportions from each language category
rename(Percent_Bilangual = Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English.,
Percent_of_Spanish_Bilangual = Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Spanish.or.Spanish.Creole.,
Percent_of_Asian_Pacific_Bilangual = Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Asian.and.Pacific.Island.languages.,
Percent_of_IndoEuro_Bilangual = Percent.of.specified.language.speakers..Speak.English.very.well...Estimate..Speak.a.language.other.than.English..Other.Indo.European.languages.,
Percent_of_Other_Bilangual = Percent.of.specified.language.speakers..Speak.English..less.than.very.well...Estimate..Speak.a.language.other.than.English..Other.languages.,
) |>
#only catergory without numbers, so tranforming it based on percentages
mutate(Other_Indo_Europe_Estimate = round(Total..Estimate..Population.5.years.and.over * (Other_Indo_Europe_Estimate /100), 0)) |>
#making blank values 0
mutate(Percent_of_Spanish_Bilangual = ifelse(Percent_of_Spanish_Bilangual == "-", 0, as.numeric(Percent_of_Spanish_Bilangual)),
Percent_of_Asian_Pacific_Bilangual = ifelse(Percent_of_Asian_Pacific_Bilangual == "-", 0, as.numeric(Percent_of_Asian_Pacific_Bilangual)),
Percent_of_IndoEuro_Bilangual = ifelse(Percent_of_IndoEuro_Bilangual == "-", 0, as.numeric(Percent_of_IndoEuro_Bilangual)),
Percent_of_Other_Bilangual = ifelse(Percent_of_Other_Bilangual == "-", 0, as.numeric(Percent_of_Other_Bilangual)),
Percent_Bilangual = ifelse(Percent_Bilangual == "-", 0, as.numeric(Percent_Bilangual) / 100)
) |>
#creating total bilangual based on people that speak the language and multiplying by proportion that speak the language and speak english very well
mutate(Spanish_Bilangual = round(Spanish_Estimate * (Percent_of_Spanish_Bilangual / 100), 0),
Asian_Pacific_Bilangual = round(Asian_Pacific_Estimate * (Percent_of_Asian_Pacific_Bilangual / 100), 0),
IndoEuro_Bilangual = round(Other_Indo_Europe_Estimate * (Percent_of_IndoEuro_Bilangual / 100), 0),
Other_Bilangual = round(Other_Estimate * (Percent_of_Other_Bilangual / 100), 0)
) |>
mutate(Percent_Spanish_Bilangual = round((Spanish_Bilangual / Total..Estimate..Population.5.years.and.over), 3),
Percent_Asian_Pacific_Bilangual = round((Asian_Pacific_Bilangual/ Total..Estimate..Population.5.years.and.over), 3),
Percent_IndoEuro_Bilangual = round((IndoEuro_Bilangual / Total..Estimate..Population.5.years.and.over), 3),
Percent_Other_Bilangual = round((Other_Bilangual / Total..Estimate..Population.5.years.and.over), 3)
)
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Percent_of_Spanish_Bilangual =
## ifelse(Percent_of_Spanish_Bilangual == "-", 0,
## as.numeric(Percent_of_Spanish_Bilangual))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
write.csv(file = "FULL_Bilingual_2012.csv", bilangualism_2012)
bilangualism_2022 <- language_2022 |>
select(Geographic.Area.Name, Estimate..Total..Population.5.years.and.over,
Estimate..Total..Population.5.years.and.over..Speak.a.language.other.than.English,
#grabbing people who speak the language proportions from each language category
Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish,
Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages,
Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.Indo.European.languages,
Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages,
Estimate..Total..Population.5.years.and.over..Speak.only.English,
#grabbing bilangual proportions from each language category
Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish.,
Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages.,
Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.Indo.European.languages.,
Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages.,
Estimate..Speak.English..less.than.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..Speak.a.language.other.than.English.,
Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..Speak.a.language.other.than.English.) |>
#renaming for clarity
rename(NonEnglish_Language_Estimate = Estimate..Total..Population.5.years.and.over..Speak.a.language.other.than.English,
Spanish_Estimate = Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish,
Asian_Pacific_Estimate = Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages,
Other_Indo_Europe_Estimate = Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.Indo.European.languages,
Other_Estimate = Estimate..Total..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages,
Spanish_Bilangual = Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Spanish.,
Asian_Pacific_Bilangual = Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Asian.and.Pacific.Island.languages.,
IndoEuro_Bilangual = Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.Indo.European.languages.,
Other_Bilangual = Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..SPEAK.A.LANGUAGE.OTHER.THAN.ENGLISH..Other.languages.,
Other_Langual_Not_Bilingual = Estimate..Speak.English..less.than.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..Speak.a.language.other.than.English.,
English_Monolangual = Estimate..Total..Population.5.years.and.over..Speak.only.English,
Overall_Bilingual = Estimate..Speak.English.only.or.speak.English.very.well...Percent.of.specified.language.speakers..Population.5.years.and.over..Speak.a.language.other.than.English.) |>
#creating total bilangual based on people that speak the language and multiplying by proportion that speak the language and speak english very well
mutate(
Percent_of_Spanish_Bilangual = ifelse(Spanish_Estimate == 0 | Spanish_Bilangual == 0, 0, round(Spanish_Bilangual / Spanish_Estimate, 3)),
Percent_of_Asian_Pacific_Bilangual = ifelse(Asian_Pacific_Estimate == 0 | Asian_Pacific_Bilangual == 0, 0, round(Asian_Pacific_Bilangual / Asian_Pacific_Estimate, 3)),
Percent_of_IndoEuro_Bilangual = ifelse(IndoEuro_Bilangual == 0 | Other_Indo_Europe_Estimate == 0, 0, round(IndoEuro_Bilangual / Other_Indo_Europe_Estimate, 3)),
Percent_of_Other_Bilangual = ifelse(Other_Bilangual == 0 | Other_Estimate == 0, 0, round(Other_Bilangual / Other_Estimate, 3)),
Percent_Overall_Bilingual = round((Overall_Bilingual / Estimate..Total..Population.5.years.and.over), 3)
) |>
mutate(
Percent_Spanish_Bilangual = ifelse(Spanish_Estimate == 0 | Spanish_Bilangual == 0, 0, round(Spanish_Bilangual / Estimate..Total..Population.5.years.and.over, 3)),
Percent_Asian_Pacific_Bilangual = ifelse(Asian_Pacific_Estimate == 0 | Asian_Pacific_Bilangual == 0, 0, round(Asian_Pacific_Bilangual / Estimate..Total..Population.5.years.and.over, 3)),
Percent_IndoEuro_Bilangual = ifelse(IndoEuro_Bilangual == 0 | Other_Indo_Europe_Estimate == 0, 0, round(IndoEuro_Bilangual / Estimate..Total..Population.5.years.and.over, 3)),
Percent_Other_Bilangual = ifelse(Other_Bilangual == 0 | Other_Estimate == 0, 0, round(Other_Bilangual / Estimate..Total..Population.5.years.and.over, 3))
)
write.csv(file = "FULL_Bilingual_2022.csv", bilangualism_2022)
final_bilangual_2012 <- bilangualism_2012 |>
select(Geographic.Area.Name, Total..Estimate..Population.5.years.and.over, Spanish_Bilangual, Asian_Pacific_Bilangual, IndoEuro_Bilangual, Other_Bilangual, Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual) |>
rename(County = Geographic.Area.Name,
Population_5_Years_Over = Total..Estimate..Population.5.years.and.over) |>
mutate(County = sub(" County, Texas", "", County))
final_bilangual_2012 <- left_join(final_bilangual_2012, regions, by = "County")
final_bilangual_2012 <- left_join(final_bilangual_2012, rural_urban, by = "County")
final_bilangual_2022 <- bilangualism_2022 |>
select(Geographic.Area.Name, Estimate..Total..Population.5.years.and.over, Spanish_Bilangual, Asian_Pacific_Bilangual, IndoEuro_Bilangual, Other_Bilangual, Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual) |>
rename(County = Geographic.Area.Name,
Population_5_Years_Over = Estimate..Total..Population.5.years.and.over) |>
mutate(County = sub(" County, Texas", "", County))
final_bilangual_2022 <- left_join(final_bilangual_2022, regions, by = "County")
final_bilangual_2022 <- left_join(final_bilangual_2022, rural_urban, by = "County")
write.csv(file = "FINAL_Bilingual_2012.csv", final_bilangual_2012)
write.csv(file = "FINAL_Bilingual_2022.csv", final_bilangual_2022)
Overall methodology: Creating weights for each county based on its proportion of population of people 5 and older compared to the whole state’s population of 5 and over. After this weight was multiplied by original number, aggregated by rural and urban status to compose final aggregate.
urban_rural_totals <- function(bilangual_data) {
rural_urban_tot <- bilangual_data |>
group_by(Rural_Urban_Stat)|>
summarise(Region_Total = sum(Population_5_Years_Over, na.rm = TRUE))
aggregated_precursor <- bilangual_data|>
left_join(rural_urban_tot, by = "Rural_Urban_Stat") |>
mutate(Weight = Population_5_Years_Over / Region_Total)
aggregated_data <- aggregated_precursor |>
group_by(Rural_Urban_Stat) |>
summarise(
Total_Spanish_Bilangual = sum(Spanish_Bilangual * Weight, na.rm = TRUE),
Total_Asian_Pacific_Bilangual = sum(Asian_Pacific_Bilangual * Weight, na.rm = TRUE),
Total_IndoEuro_Bilangual = sum(IndoEuro_Bilangual * Weight, na.rm = TRUE),
Total_Other_Bilangual = sum(Other_Bilangual * Weight, na.rm = TRUE),
Total_Population_5_Years_Over = sum(Population_5_Years_Over * Weight, na.rm = TRUE)
) |>
mutate(
Percent_Spanish_Bilangual = round(Total_Spanish_Bilangual / Total_Population_5_Years_Over, 3),
Percent_Asian_Pacific_Bilangual = round(Total_Asian_Pacific_Bilangual / Total_Population_5_Years_Over, 3),
Percent_IndoEuro_Bilangual = round(Total_IndoEuro_Bilangual / Total_Population_5_Years_Over, 3),
Percent_Other_Bilangual = round(Total_Other_Bilangual / Total_Population_5_Years_Over, 3)
)
return(aggregated_data)
}
rural_urban_2012 <- urban_rural_totals(final_bilangual_2012)
rural_urban_2022 <- urban_rural_totals(final_bilangual_2022)
Overall methodology: Creating weights for each county based on its proportion of population of people 5 and older compared to the whole state’s population of 5 and over. After this weight was multiplied by original number, aggregated by region category to compose final aggregate.
region_total <- function(bilangual_data) {
region_totals <- bilangual_data |>
group_by(Region)|>
summarise(Region_Total = sum(Population_5_Years_Over, na.rm = TRUE))
aggregated_precursor <- bilangual_data|>
left_join(region_totals, by = "Region") |>
mutate(Weight = Population_5_Years_Over / Region_Total)
aggregated_data <- aggregated_precursor |>
group_by(Region) |>
summarise(
Total_Spanish_Bilangual = sum(Spanish_Bilangual * Weight, na.rm = TRUE),
Total_Asian_Pacific_Bilangual = sum(Asian_Pacific_Bilangual * Weight, na.rm = TRUE),
Total_IndoEuro_Bilangual = sum(IndoEuro_Bilangual * Weight, na.rm = TRUE),
Total_Other_Bilangual = sum(Other_Bilangual * Weight, na.rm = TRUE),
Total_Population_5_Years_Over = sum(Population_5_Years_Over * Weight, na.rm = TRUE)
) |>
mutate(
Percent_Spanish_Bilangual = round(Total_Spanish_Bilangual / Total_Population_5_Years_Over, 3),
Percent_Asian_Pacific_Bilangual = round(Total_Asian_Pacific_Bilangual / Total_Population_5_Years_Over, 3),
Percent_IndoEuro_Bilangual = round(Total_IndoEuro_Bilangual / Total_Population_5_Years_Over, 3),
Percent_Other_Bilangual = round(Total_Other_Bilangual / Total_Population_5_Years_Over, 3)
)
return(aggregated_data)
}
regions_2012 <- region_total(final_bilangual_2012)
regions_2022 <- region_total(final_bilangual_2022)
write.csv(file = "Bilingual_Regions_2012.csv",regions_2012 )
write.csv(file = "Bilingual_Regions_2022.csv", regions_2022)
to_rur_urb_merge_2012 <- rural_urban_2012 |>
select(Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual, Rural_Urban_Stat) |>
filter(Rural_Urban_Stat != "State") |>
rename(Percent_Spanish_Bilangual_2012 = Percent_Spanish_Bilangual,
Percent_Asian_Pacific_Bilangual_2012 = Percent_Asian_Pacific_Bilangual,
Percent_IndoEuro_Bilangual_2012 = Percent_IndoEuro_Bilangual,
Percent_Other_Bilangual_2012 = Percent_Other_Bilangual
)
to_rur_urb_merge_2022 <- rural_urban_2022 |>
select(Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual, Rural_Urban_Stat) |>
filter(Rural_Urban_Stat != "State")
differentials_rural_urban <- merge(to_rur_urb_merge_2012, to_rur_urb_merge_2022, by = "Rural_Urban_Stat") |>
mutate(Spanish = (Percent_Spanish_Bilangual - Percent_Spanish_Bilangual_2012) * 100,
Asian_Pacific_Island_Languages = (Percent_Asian_Pacific_Bilangual - Percent_Asian_Pacific_Bilangual_2012) * 100,
Other_Indo_European_Language = (Percent_IndoEuro_Bilangual - Percent_IndoEuro_Bilangual_2012) * 100,
Other_Language = (Percent_Other_Bilangual - Percent_Other_Bilangual_2012) * 100
) |>
select(Rural_Urban_Stat, Spanish, Asian_Pacific_Island_Languages, Other_Indo_European_Language, Other_Language )
to_merge_regions_2012 <- regions_2012 |>
select(Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual, Region) |>
filter(Region != "State") |>
rename(Percent_Spanish_Bilangual_2012 = Percent_Spanish_Bilangual,
Percent_Asian_Pacific_Bilangual_2012 = Percent_Asian_Pacific_Bilangual,
Percent_IndoEuro_Bilangual_2012 = Percent_IndoEuro_Bilangual,
Percent_Other_Bilangual_2012 = Percent_Other_Bilangual
)
to_merge_regions_2022 <- regions_2022 |>
select(Percent_Spanish_Bilangual, Percent_Asian_Pacific_Bilangual, Percent_IndoEuro_Bilangual, Percent_Other_Bilangual, Region) |>
filter(Region != "State")
differentials_regions <- merge(to_merge_regions_2012, to_merge_regions_2022, by = "Region") |>
mutate(Spanish = (Percent_Spanish_Bilangual - Percent_Spanish_Bilangual_2012) * 100,
Asian_Pacific_Island_Languages = (Percent_Asian_Pacific_Bilangual - Percent_Asian_Pacific_Bilangual_2012) * 100,
Other_Indo_European_Language = (Percent_IndoEuro_Bilangual - Percent_IndoEuro_Bilangual_2012) * 100,
Other_Language = (Percent_Other_Bilangual - Percent_Other_Bilangual_2012) * 100
) |>
select(Region, Spanish, Asian_Pacific_Island_Languages, Other_Indo_European_Language, Other_Language )
write.csv(file = "Regions_Bilingual_Differential.csv", differentials_regions)
write.csv(file = "Rural_Urban_Bilingual_Differential.csv", differentials_rural_urban)