Use of data from IPUMS USA is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
Data Cleaning and Preparation
ipums_data <- ipums_data %>%mutate(YEAR =as.integer(YEAR),STATEFIP =as.character(STATEFIP), # Convert to character for mappingAGE =as.integer(AGE),SEX =factor(SEX, labels =c("Male", "Female")),URBAN =as.character(URBAN),BPL =as.integer(BPL), # Ensure Birthplace is numericPERWT =as.numeric(PERWT), # Ensure weight is numericIND1950 =as.character(IND1950) # Convert industry classification to character )
labforce_labels <-c("0"="N/A","1"="No, not in the labor force","2"="Yes, in the labor force","9"="Unclassifiable")# Function to process labor force dataprocess_labor_force <-function(data) { data %>%group_by(YEAR, LABFORCE) %>%summarise(count =sum(PERWT, na.rm =TRUE), .groups ="drop") %>%mutate(LABFORCE = labforce_labels[as.character(LABFORCE)], # Map labelsTotal =sum(count), # Compute total for each yearPercentage = (count / Total) *100# Compute percentage ) %>%select(YEAR, LABFORCE, count, Percentage) %>%pivot_wider(names_from = YEAR, values_from =c(count, Percentage), names_glue ="{YEAR}_{.value}") %>%mutate(`1900`=ifelse(!is.na(`1900_count`), paste0(`1900_count`, " (", round(`1900_Percentage`, 2), "%)"), "N/A"),`1910`=ifelse(!is.na(`1910_count`), paste0(`1910_count`, " (", round(`1910_Percentage`, 2), "%)"), "N/A") ) %>%select(LABFORCE, `1900`, `1910`) # Keep only relevant columns}# Ensure the general population data is definedif (!exists("general_population_data")) { general_population_data <- ipums_data # Use full dataset for everyone}# Process labor force data for India-born individualslabor_force_india <- india_born_data %>%process_labor_force() %>%rename(`1900 India-Born`=`1900`, `1910 India-Born`=`1910`)# Process labor force data for the general populationlabor_force_general <- general_population_data %>%process_labor_force() %>%rename(`1900 General`=`1900`, `1910 General`=`1910`)# Merge both datasets to create a single comparison tablelabor_force_comparison <- labor_force_general %>%left_join(labor_force_india, by ="LABFORCE")# Display comparison table with kablekable(labor_force_comparison, digits =2, caption ="Comparison of Labor Force Status Between India-Born and General Population (1900 & 1910)")
Comparison of Labor Force Status Between India-Born and General Population (1900 & 1910)
LABFORCE
1900 General
1910 General
1900 India-Born
1910 India-Born
N/A
27603280 (16.39%)
31365865 (18.62%)
501 (7.04%)
502 (7.05%)
No, not in the labor force
21026481 (12.49%)
24769361 (14.71%)
901 (12.66%)
1103 (15.5%)
Yes, in the labor force
27437775 (16.29%)
36210178 (21.5%)
602 (8.46%)
3509 (49.3%)
Occupations
# Define a mapping of the specified OCC codes to occupation labelsoccupation_labels <-c("999"="N/A or None Reported","12"="Farmers (owners and tenants)","46"="Carpenters","304"="Bookkeepers and accounting clerks","91"="Truck, delivery, and tractor drivers","988"="Laborers, except construction","994"="Not specified service workers","196"="Clerical and kindred workers","62"="Machinists","74"="Electricians","980"="Operatives and kindred workers")# Filter data for individuals aged 10+filtered_data <- ipums_data %>%filter(AGE >=10)# Compute occupation distribution for the India-born groupoccupation_group <- filtered_data %>%filter(BPL ==521) %>%# India is coded as 521count(OCC, wt = PERWT) %>%# Use PERWT for weighted countmutate(Percentage = n /sum(n) *100,Occupation = occupation_labels[as.character(OCC)] # Map only specified labels ) %>%filter(!is.na(Occupation)) %>%# Keep only specified occupationsarrange(desc(Percentage)) %>%head(10) %>%select(OCC, Occupation, n, Percentage) # Keep only relevant columns# Compute occupation distribution for the general populationoccupation_population <- filtered_data %>%count(OCC, wt = PERWT) %>%# Use PERWT for weighted countmutate(Percentage = n /sum(n) *100,Occupation = occupation_labels[as.character(OCC)] # Map only specified labels ) %>%filter(!is.na(Occupation)) %>%# Keep only specified occupationsarrange(desc(Percentage)) %>%head(10) %>%select(OCC, Occupation, n, Percentage) # Keep only relevant columns# Display tableskable(occupation_group, digits =2, caption ="Top 10 Occupations of India-Born Individuals (Age 10+)")
Top 10 Occupations of India-Born Individuals (Age 10+)
OCC
Occupation
n
Percentage
999
N/A or None Reported
2005
29.85
12
Farmers (owners and tenants)
603
8.98
46
Carpenters
502
7.47
304
Bookkeepers and accounting clerks
500
7.44
91
Truck, delivery, and tractor drivers
201
2.99
988
Laborers, except construction
201
2.99
994
Not specified service workers
201
2.99
196
Clerical and kindred workers
200
2.98
62
Machinists
101
1.50
74
Electricians
101
1.50
kable(occupation_population, digits =2, caption ="Top 10 Occupations of the General Population (Age 10+)")
Top 10 Occupations of the General Population (Age 10+)
OCC
Occupation
n
Percentage
999
N/A or None Reported
54506584
41.95
304
Bookkeepers and accounting clerks
6892037
5.30
12
Farmers (owners and tenants)
2207438
1.70
196
Clerical and kindred workers
1442506
1.11
994
Not specified service workers
685775
0.53
91
Truck, delivery, and tractor drivers
513244
0.40
988
Laborers, except construction
459118
0.35
46
Carpenters
208899
0.16
62
Machinists
205815
0.16
980
Operatives and kindred workers
97883
0.08
Industries
# Define a mapping of the specified IND1950 codes to industry labelsindustry_labels <-c("0"="N/A or None Reported","105"="Agriculture","826"="Private Households","246"="Construction","506"="Railroads and Railway Express Service","997"="Nonclassifiable","636"="Food Stores, Except Dairy Products","888"="Educational Services","439"="Yarn, Thread, and Fabric Mills","216"="Coal Mining")# Filter data for individuals aged 10+filtered_data <- ipums_data %>%filter(AGE >=10)# Compute industry distribution for the India-born groupindustry_group <- filtered_data %>%filter(BPL ==521) %>%# India is coded as 521count(IND1950, wt = PERWT) %>%# Use PERWT for weighted countmutate(Percentage = n /sum(n) *100,Industry = industry_labels[as.character(IND1950)] # Map only specified labels ) %>%filter(!is.na(Industry)) %>%# Keep only specified industriesarrange(desc(Percentage)) %>%head(10) %>%select(IND1950, Industry, n, Percentage) # Keep only relevant columns# Compute industry distribution for the general populationindustry_population <- filtered_data %>%count(IND1950, wt = PERWT) %>%# Use PERWT for weighted countmutate(Percentage = n /sum(n) *100,Industry = industry_labels[as.character(IND1950)] # Map only specified labels ) %>%filter(!is.na(Industry)) %>%# Keep only specified industriesarrange(desc(Percentage)) %>%head(10) %>%select(IND1950, Industry, n, Percentage) # Keep only relevant columns# Display tableskable(industry_group, digits =2, caption ="Top 10 Industries of India-Born Individuals (Age 10+)")
Top 10 Industries of India-Born Individuals (Age 10+)
IND1950
Industry
n
Percentage
0
N/A or None Reported
2805
41.77
105
Agriculture
1205
17.94
246
Construction
101
1.50
826
Private Households
100
1.49
888
Educational Services
100
1.49
kable(industry_population, digits =2, caption ="Top 10 Industries of the General Population (Age 10+)")
Top 10 Industries of the General Population (Age 10+)
IND1950
Industry
n
Percentage
0
N/A or None Reported
64287504
49.48
105
Agriculture
22809334
17.55
826
Private Households
3872636
2.98
246
Construction
3304884
2.54
506
Railroads and Railway Express Service
2435376
1.87
997
Nonclassifiable
1849958
1.42
636
Food Stores, Except Dairy Products
1434992
1.10
888
Educational Services
1319729
1.02
439
Yarn, Thread, and Fabric Mills
1081252
0.83
216
Coal Mining
979816
0.75
Urban/Rural Comparison
# Define a mapping for URBAN codes (adjust based on dataset)urban_labels <-c("0"="N/A","1"="Rural","2"="Urban")# Function to process urban vs. rural dataprocess_urban_rural <-function(data) { data %>%group_by(YEAR, URBAN) %>%summarise(count =sum(PERWT, na.rm =TRUE), .groups ="drop") %>%mutate(URBAN = urban_labels[as.character(URBAN)], # Map labelsTotal =sum(count), # Compute total for each yearPercentage = (count / Total) *100# Compute percentage ) %>%select(YEAR, URBAN, count, Percentage) %>%pivot_wider(names_from = YEAR, values_from =c(count, Percentage), names_glue ="{YEAR}_{.value}") %>%mutate(`1900`=ifelse(!is.na(`1900_count`), paste0(`1900_count`, " (", round(`1900_Percentage`, 2), "%)"), "N/A"),`1910`=ifelse(!is.na(`1910_count`), paste0(`1910_count`, " (", round(`1910_Percentage`, 2), "%)"), "N/A") ) %>%select(URBAN, `1900`, `1910`) # Keep only relevant columns}# Ensure the general population data is definedif (!exists("general_population_data")) { general_population_data <- ipums_data # Use full dataset for everyone}# Process urban/rural data for India-born individualsurban_rural_india <- india_born_data %>%process_urban_rural() %>%rename(`1900 India-Born`=`1900`, `1910 India-Born`=`1910`)# Process urban/rural data for the general populationurban_rural_general <- general_population_data %>%process_urban_rural() %>%rename(`1900 General`=`1900`, `1910 General`=`1910`)# Merge both datasets to create a single comparison tableurban_rural_comparison <- urban_rural_general %>%left_join(urban_rural_india, by ="URBAN")# Display comparison table with kablekable(urban_rural_comparison, digits =2, caption ="Comparison of Urban/Rural Distribution Between India-Born and General Population (1900 & 1910)")
Comparison of Urban/Rural Distribution Between India-Born and General Population (1900 & 1910)
URBAN
1900 General
1910 General
1900 India-Born
1910 India-Born
Rural
46638181 (27.69%)
50952346 (30.25%)
400 (5.62%)
2006 (28.18%)
Urban
29429355 (17.47%)
41393058 (24.58%)
1604 (22.53%)
3108 (43.66%)
Nativity
# Define a mapping for NATIVITY codesnativity_labels <-c("0"="N/A or Unknown","1"="Both Parents Native-Born","2"="Father Foreign, Mother Native","3"="Mother Foreign, Father Native","4"="Both Parents Foreign","5"="Foreign-Born")# Function to process nativity dataprocess_nativity <-function(data) { data %>%group_by(YEAR, NATIVITY) %>%summarise(count =sum(PERWT, na.rm =TRUE), .groups ="drop") %>%mutate(NATIVITY = nativity_labels[as.character(NATIVITY)], # Map labelsTotal =sum(count), # Compute total for each yearPercentage = (count / Total) *100# Compute percentage ) %>%select(YEAR, NATIVITY, count, Percentage) %>%pivot_wider(names_from = YEAR, values_from =c(count, Percentage), names_glue ="{YEAR}_{.value}") %>%mutate(`1900`=ifelse(!is.na(`1900_count`), paste0(`1900_count`, " (", round(`1900_Percentage`, 2), "%)"), "N/A"),`1910`=ifelse(!is.na(`1910_count`), paste0(`1910_count`, " (", round(`1910_Percentage`, 2), "%)"), "N/A") ) %>%select(NATIVITY, `1900`, `1910`) # Keep only relevant columns}# Ensure the general population data is definedif (!exists("general_population_data")) { general_population_data <- ipums_data # Use full dataset for everyone}# Process nativity data for India-born individualsnativity_india <- india_born_data %>%process_nativity() %>%rename(`1900 India-Born`=`1900`, `1910 India-Born`=`1910`)# Process nativity data for the general populationnativity_general <- general_population_data %>%process_nativity() %>%rename(`1900 General`=`1900`, `1910 General`=`1910`)# Merge both datasets to create a single comparison tablenativity_comparison <- nativity_general %>%left_join(nativity_india, by ="NATIVITY")# Display comparison table with kablekable(nativity_comparison, digits =2, caption ="Comparison of Nativity Between India-Born and General Population (1900 & 1910)")
Comparison of Nativity Between India-Born and General Population (1900 & 1910)
NATIVITY
1900 General
1910 General
1900 India-Born
1910 India-Born
Both Parents Native-Born
49808238 (29.58%)
59242530 (35.18%)
NA
NA
Father Foreign, Mother Native
3496849 (2.08%)
4234224 (2.51%)
NA
NA
Mother Foreign, Father Native
1673298 (0.99%)
2227348 (1.32%)
NA
NA
Both Parents Foreign
10605032 (6.3%)
12971464 (7.7%)
NA
NA
Foreign-Born
10484119 (6.23%)
13669838 (8.12%)
2004 (28.15%)
5114 (71.85%)
Geographic Distribution (For Mapping)
geo_distribution_data <- ipums_data %>%group_by(STATEFIP) %>%summarise(total_state_population =sum(PERWT, na.rm =TRUE), # Total weighted population in the stateindia_population =sum(if_else(india_born ==1, PERWT, 0), na.rm =TRUE), # India-born weighted populationindia_prop_of_group = india_population /sum(india_population), # Share of total India-born in each stateindia_prop_in_state = india_population / total_state_population # % of state's population that is India-born ) %>%ungroup()# Convert `STATEFIP` to full state names for claritygeo_distribution_data <- geo_distribution_data %>%mutate(State = state.name[match(STATEFIP, state.abb)])
Mapping
geo_distribution_data <- geo_distribution_data %>%mutate(fips =as.numeric(STATEFIP))map_india_population <-plot_usmap(data = geo_distribution_data, values ="india_population", regions ="states") +scale_fill_continuous(name ="India-born Pop.", low ="lightblue", high ="darkblue", label = scales::comma) +labs(title ="Number of India-born Individuals by State (1900 & 1910)",subtitle ="Source: IPUMS-USA",caption ="Darker states indicate a higher number of India-born residents") +theme_minimal()map_india_share <-plot_usmap(data = geo_distribution_data, values ="india_prop_of_group", regions ="states") +scale_fill_continuous(name ="Share of India-born (%)", low ="lightgreen", high ="darkgreen", label = scales::percent) +labs(title ="Proportion of India-born Individuals in Each State (1900 & 1910)",subtitle ="Source: IPUMS-USA",caption ="Darker states indicate a higher proportion of the total India-born population") +theme_minimal()map_india_relative <-plot_usmap(data = geo_distribution_data, values ="india_prop_in_state", regions ="states") +scale_fill_continuous(name ="India-born as % of State Pop.", low ="lightpink", high ="darkred", label = scales::percent) +labs(title ="Proportion of State Population That is India-born (1900 & 1910)",subtitle ="Source: IPUMS-USA",caption ="Darker states indicate a higher proportion of India-born residents in the state population") +theme_minimal()map_india_population