remove() #Remove objects from the workspace
rm(list=ls()) #removes all objects from the current workspace (R memory)
data1 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Social demography and community trends\\Oregon.csv")
str(data1)
## 'data.frame': 18 obs. of 4 variables:
## $ age_group: chr " 0-4" " 5-9" " 10-14" " 15-19" ...
## $ total : chr "198,150" "222,642" "252,517" "256,017" ...
## $ male : chr "100,314" "114,284" "130,822" "128,461" ...
## $ female : chr "97,836" "108,358" "121,695" "127,556" ...
data1$total <- as.numeric(gsub(",", "", data1$total))
data1$male <- as.numeric(gsub(",", "", data1$male))
data1$female <- as.numeric(gsub(",", "", data1$female))
# Define the age group ranges
youth_population <- sum(data1$total[1:3]) # Sum of ages 0-14
working_population <- sum(data1$total[4:13]) # Sum of ages 15-64
aged_population <- sum(data1$total[14:18]) # Sum of ages 65+
# Calculate Dependency Ratios
YDR <- (youth_population / working_population) * 100
ADR <- (aged_population / working_population) * 100
TDR <- ((youth_population + aged_population) / working_population) * 100
# Print the results
cat("Youth Dependency Ratio (YDR):", round(YDR, 2), "\n")
## Youth Dependency Ratio (YDR): 24.64
cat("Aged Dependency Ratio (ADR):", round(ADR, 2), "\n")
## Aged Dependency Ratio (ADR): 30.3
cat("Total Dependency Ratio (TDR):", round(TDR, 2), "\n")
## Total Dependency Ratio (TDR): 54.94
# Sum the total Male and Female populations
total_male <- sum(data1$male)
total_female <- sum(data1$female)
# Calculate the Sex Ratio (Males per 100 Females)
sex_ratio <- (total_male / total_female) * 100
# Print the result
cat("Sex Ratio for Oregon (Males per 100 Females):", round(sex_ratio, 2), "\n")
## Sex Ratio for Oregon (Males per 100 Females): 98.92
# Re-create the data frame with the correct column names
data1 <- data.frame(
age_group = factor(c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84", "85+"),
levels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84", "85+")), # Explicit ordering
male = c(100314, 114284, 130822, 128461, 127646, 141923, 160085, 148972,
156702, 143279, 133021, 119949, 129884, 121631, 111335, 74472,
41557, 31576),
female = c(97836, 108358, 121695, 127556, 125711, 136557, 153327, 144967,
150107, 132121, 128283, 119271, 138089, 134309, 125224, 85903,
51993, 49474)
)
# Calculate the total population
total_population <- sum(data1$male) + sum(data1$female)
# Calculate the percentage of each age and sex group
data1$Male_Percent <- (data1$male / total_population) * 100
data1$Female_Percent <- (data1$female / total_population) * 100
# Prepare the data for plotting
library(ggplot2)
# Create a long-format data frame for plotting
pyramid_data <- data.frame(
age_group = rep(data1$age_group, 2),
Percent = c(-data1$Male_Percent, data1$Female_Percent), # Negative for males for pyramid
Sex = rep(c("male", "female"), each = nrow(data1))
)
# Plot the age-sex pyramid
ggplot(pyramid_data, aes(x = age_group, y = Percent, fill = Sex)) +
geom_bar(stat = "identity", width = 0.8) +
coord_flip() + # Flip the coordinates to make the pyramid horizontal
scale_y_continuous(labels = abs, breaks = seq(-4, 4, by = 1)) + # Custom x-axis with 0, 1, 2, 3, 4
labs(title = "Age-Sex Pyramid for Oregon", x = "age group", y = "Percentage") +
theme_minimal()
### vietnam Age heaping
data2 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Social demography and community trends\\Vietnam_ageheaping.csv")
str(data2)
## 'data.frame': 40 obs. of 4 variables:
## $ Age : int 23 24 25 26 27 28 29 30 31 32 ...
## $ Total : chr "1,426,194" "1,482,736" "1,579,220" "1,657,742" ...
## $ Female: chr "697,705" "725,189" "775,799" "814,826" ...
## $ Male : chr "728,488" "757,547" "803,421" "842,916" ...
data2$Total <- as.numeric(gsub(",", "", data2$Total))
data2$Male <- as.numeric(gsub(",", "", data2$Male))
data2$Female <- as.numeric(gsub(",", "", data2$Female))
# Filter the data for ages 23 to 62
population_23_62 <- subset(data2, Age >= 23 & Age <= 62)
# Find the ages ending in 0 or 5 within this age range
ages_ending_in_0_or_5 <- subset(population_23_62, Age %% 5 == 0 | Age %% 5 == 5)
# Calculate the sum of population for ages ending in 0 or 5
total_pop_0_5 <- sum(ages_ending_in_0_or_5$Total)
male_pop_0_5 <- sum(ages_ending_in_0_or_5$Male)
female_pop_0_5 <- sum(ages_ending_in_0_or_5$Female)
# Calculate the sum of total population, male population, and female population for ages 23 to 62
total_pop_23_62 <- sum(population_23_62$Total)
male_pop_23_62 <- sum(population_23_62$Male)
female_pop_23_62 <- sum(population_23_62$Female)
# Calculate Whipple Index for total population, males, and females
whipple_index_total <- (total_pop_0_5 / (0.2 * total_pop_23_62)) * 100
whipple_index_male <- (male_pop_0_5 / (0.2 * male_pop_23_62)) * 100
whipple_index_female <- (female_pop_0_5 / (0.2 * female_pop_23_62)) * 100
# Print the results with proper formatting
cat("Whipple Index for Total Population:", formatC(whipple_index_total, format = "f", digits = 2), "\n")
## Whipple Index for Total Population: 99.61
cat("Whipple Index for Male Population:", formatC(whipple_index_male, format = "f", digits = 2), "\n")
## Whipple Index for Male Population: 99.40
cat("Whipple Index for Female Population:", formatC(whipple_index_female, format = "f", digits = 2), "\n")
## Whipple Index for Female Population: 99.82