industry_data_2012 <- read.csv("2012_2022 5 YEAR by industry S2404(2012).csv")
industry_data_2022 <- read.csv("2012_2022 5 YEAR by industry S2404(2022).csv")
rucc_data <- read.csv("Ruralurbancontinuumcodes2023.csv")
locations <- read.csv("location_countyNames.csv")
urban_rural <- rucc_data |>
mutate(Rural_Urban_Stat = ifelse(grepl("nonmetro", Description, ignore.case = TRUE), "Rural",
ifelse(grepl("metro", Description, ignore.case = TRUE), "Urban", NA)))
table(urban_rural$Rural_Urban_Stat)
##
## Rural Urban
## 168 86
table(rucc_data$Description)
##
## Metro - Counties in metro areas of 1 million population or more
## 34
## Metro - Counties in metro areas of 250,000 to 1 million population
## 35
## Metro - Counties in metro areas of fewer than 250,000 population
## 17
## Nonmetro - Urban population of 20,000 or more, adjacent to a metro area
## 10
## Nonmetro - Urban population of 20,000 or more, not adjacent to a metro area
## 6
## Nonmetro - Urban population of 5,000 to 20,000, adjacent to a metro area
## 42
## Nonmetro - Urban population of 5,000 to 20,000, not adjacent to a metro area
## 18
## Nonmetro - Urban population of fewer than 5,000, adjacent to a metro area
## 46
## Nonmetro - Urban population of fewer than 5,000, not adjacent to a metro area
## 46
^^ 86 urban and 168 rural, all checks out!
PIVOTING DATA
names <- locations |>
select(Location) |>
pull()
# # Pivoting the data
# clean_names <- names(industry_data_2012) |>
# str_replace_all(" County, Texas!!Female!!Estimate", "") |>
# str_replace_all("\\.\\.", " ") |>
# str_replace_all("!!Female!!Estimate", "") |>
# str_replace_all("\\.County Texas Female Estimate", "") |>
# str_replace_all("\\.", " ")
#
# names(industry_data_2012) <- clean_names
# # Create a pattern to match county columns
# pattern <- paste0("^(", paste(names, collapse = "|"), ")")
# # Pivot the data
# indust_2012_long <- industry_data_2012 |>
# pivot_longer(
# cols = matches(pattern),
# names_to = "County",
# values_to = "Estimate"
# )
ADDING RURAL/URBAN
# classification <- urban_rural |>
# select(County_Name, Rural_Urban_Stat) |>
# mutate(County_Name = str_replace_all(County_Name, " County", ""))
#
# classification <- classification|>
# add_row(County_Name = "Texas Female Estimate
# ", Rural_Urban_Stat = "State")
# classification <- classification |>
# rename(County = County_Name)
# final_2012_indust <- left_join(classification, indust_2012_long, by = "County")
#2012 dataset
#clean the names of the dataset
cleaned_names_2012 <- names(industry_data_2012) |>
str_replace_all(" County, Texas!!Female!!Estimate", "") |>
str_replace_all("\\.\\.", " ") |>
str_replace_all("!!Female!!Estimate", "") |>
str_replace_all("\\.County Texas Female Estimate", "") |>
str_replace_all("\\.", " ")
names(industry_data_2012) <- cleaned_names_2012
#2022 dataset
#clean the names of the dataset
cleaned_names_2022 <- names(industry_data_2022) |>
str_replace_all(" County, Texas!!Female!!Percent!!Estimate", "") |>
str_replace_all("\\.\\.", " ") |>
str_replace_all("\\.County Texas Percent\\.Female Estimate", "") |>
str_replace_all("\\.", " ")
names(industry_data_2022) <- cleaned_names_2022
industry_data_2022 <- industry_data_2022 |>
rename(`Texas Female Estimate` = `Texas Percent Female Estimate`)
#method to pivot dataset and add urban/rural classification
make_workable_dataset <- function(dataset, classification_data) {
#create a pattern to match county columns
pattern <- paste0("^(", paste(names, collapse = "|"), ")")
# pivot the data
dataset_long <- dataset |>
pivot_longer(
cols = matches(pattern),
names_to = "County",
values_to = "Estimate"
) |>
slice(1:6912)
#adding classification
classification <- classification_data |>
select(County_Name, Rural_Urban_Stat) |>
mutate(County_Name = str_replace_all(County_Name, " County", ""))
classification <- classification|>
add_row(County_Name = "Texas Female Estimate", Rural_Urban_Stat = "State") |>
rename(County = County_Name)
final_indust_data <- left_join(classification, dataset_long, by = "County")
final_indust_data <- final_indust_data |>
rename(Label = `Label Grouping `) |>
mutate(Label = str_replace_all(Label, "����", ""))
return(final_indust_data)
}
final_2012_indust <- make_workable_dataset(industry_data_2012, urban_rural)
final_2022_indust <- make_workable_dataset(industry_data_2022, urban_rural)
write.csv(file = "Industry_2012_ACS_5_YEAR.csv", final_2012_indust)
write.csv(file = "Industry_2022_ACS_5_YEAR.csv", final_2022_indust)