Exam 1

 remove()  #Remove objects from the workspace
 rm(list=ls())  #removes all objects from the current workspace (R memory)

data1 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Social demography and community trends\\Oregon.csv")
str(data1)

## 'data.frame':    18 obs. of  4 variables:
##  $ age_group: chr  " 0-4" " 5-9" " 10-14" " 15-19" ...
##  $ total    : chr  "198,150" "222,642" "252,517" "256,017" ...
##  $ male     : chr  "100,314" "114,284" "130,822" "128,461" ...
##  $ female   : chr  "97,836" "108,358" "121,695" "127,556" ...

data1$total <- as.numeric(gsub(",", "", data1$total))
data1$male <- as.numeric(gsub(",", "", data1$male))
data1$female <- as.numeric(gsub(",", "", data1$female))

# Define the age group ranges
youth_population <- sum(data1$total[1:3])  # Sum of ages 0-14
working_population <- sum(data1$total[4:13])  # Sum of ages 15-64
aged_population <- sum(data1$total[14:18])  # Sum of ages 65+

# Calculate Dependency Ratios
YDR <- (youth_population / working_population) * 100
ADR <- (aged_population / working_population) * 100
TDR <- ((youth_population + aged_population) / working_population) * 100

# Print the results
cat("Youth Dependency Ratio (YDR):", round(YDR, 2), "\n")

## Youth Dependency Ratio (YDR): 24.64

cat("Aged Dependency Ratio (ADR):", round(ADR, 2), "\n")

## Aged Dependency Ratio (ADR): 30.3

cat("Total Dependency Ratio (TDR):", round(TDR, 2), "\n")

## Total Dependency Ratio (TDR): 54.94

# Sum the total Male and Female populations
total_male <- sum(data1$male)
total_female <- sum(data1$female)

# Calculate the Sex Ratio (Males per 100 Females)
sex_ratio <- (total_male / total_female) * 100

# Print the result
cat("Sex Ratio for Oregon (Males per 100 Females):", round(sex_ratio, 2), "\n")

## Sex Ratio for Oregon (Males per 100 Females): 98.92

# Re-create the data frame with the correct column names
data1 <- data.frame(
  age_group = factor(c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", 
                       "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", 
                       "65-69", "70-74", "75-79", "80-84", "85+"),
                     levels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", 
                                "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", 
                                "65-69", "70-74", "75-79", "80-84", "85+")),  # Explicit ordering
  male = c(100314, 114284, 130822, 128461, 127646, 141923, 160085, 148972, 
           156702, 143279, 133021, 119949, 129884, 121631, 111335, 74472, 
           41557, 31576),
  female = c(97836, 108358, 121695, 127556, 125711, 136557, 153327, 144967, 
             150107, 132121, 128283, 119271, 138089, 134309, 125224, 85903, 
             51993, 49474)
)

# Calculate the total population
total_population <- sum(data1$male) + sum(data1$female)

# Calculate the percentage of each age and sex group
data1$Male_Percent <- (data1$male / total_population) * 100
data1$Female_Percent <- (data1$female / total_population) * 100

# Prepare the data for plotting
library(ggplot2)

# Create a long-format data frame for plotting
pyramid_data <- data.frame(
  age_group = rep(data1$age_group, 2),
  Percent = c(-data1$Male_Percent, data1$Female_Percent),  # Negative for males for pyramid
  Sex = rep(c("male", "female"), each = nrow(data1))
)

# Plot the age-sex pyramid
ggplot(pyramid_data, aes(x = age_group, y = Percent, fill = Sex)) +
  geom_bar(stat = "identity", width = 0.8) +
  coord_flip() +  # Flip the coordinates to make the pyramid horizontal
  scale_y_continuous(labels = abs, breaks = seq(-4, 4, by = 1)) +  # Custom x-axis with 0, 1, 2, 3, 4
  labs(title = "Age-Sex Pyramid for Oregon", x = "age group", y = "Percentage") +
  theme_minimal()

### vietnam Age heaping

data2 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Social demography and community trends\\Vietnam_ageheaping.csv")
str(data2)

## 'data.frame':    40 obs. of  4 variables:
##  $ Age   : int  23 24 25 26 27 28 29 30 31 32 ...
##  $ Total : chr  "1,426,194" "1,482,736" "1,579,220" "1,657,742" ...
##  $ Female: chr  "697,705" "725,189" "775,799" "814,826" ...
##  $ Male  : chr  "728,488" "757,547" "803,421" "842,916" ...

data2$Total <- as.numeric(gsub(",", "", data2$Total))
data2$Male <- as.numeric(gsub(",", "", data2$Male))
data2$Female <- as.numeric(gsub(",", "", data2$Female))

# Filter the data for ages 23 to 62
population_23_62 <- subset(data2, Age >= 23 & Age <= 62)

# Find the ages ending in 0 or 5 within this age range
ages_ending_in_0_or_5 <- subset(population_23_62, Age %% 5 == 0 | Age %% 5 == 5)

# Calculate the sum of population for ages ending in 0 or 5
total_pop_0_5 <- sum(ages_ending_in_0_or_5$Total)
male_pop_0_5 <- sum(ages_ending_in_0_or_5$Male)
female_pop_0_5 <- sum(ages_ending_in_0_or_5$Female)

# Calculate the sum of total population, male population, and female population for ages 23 to 62
total_pop_23_62 <- sum(population_23_62$Total)
male_pop_23_62 <- sum(population_23_62$Male)
female_pop_23_62 <- sum(population_23_62$Female)

# Calculate Whipple Index for total population, males, and females
whipple_index_total <- (total_pop_0_5 / (0.2 * total_pop_23_62)) * 100
whipple_index_male <- (male_pop_0_5 / (0.2 * male_pop_23_62)) * 100
whipple_index_female <- (female_pop_0_5 / (0.2 * female_pop_23_62)) * 100

# Print the results with proper formatting
cat("Whipple Index for Total Population:", formatC(whipple_index_total, format = "f", digits = 2), "\n")

## Whipple Index for Total Population: 99.61

cat("Whipple Index for Male Population:", formatC(whipple_index_male, format = "f", digits = 2), "\n")

## Whipple Index for Male Population: 99.40

cat("Whipple Index for Female Population:", formatC(whipple_index_female, format = "f", digits = 2), "\n")

## Whipple Index for Female Population: 99.82

Exam 1

Anamika Kumar

2024-10-19