remove()  #Remove objects from the workspace
 rm(list=ls())  #removes all objects from the current workspace (R memory)
data1 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 4\\data_birth.csv")
str(data1)
## 'data.frame':    9 obs. of  10 variables:
##  $ Age.Group     : chr  "Lt 15" "15-19" "20-24" "25-29" ...
##  $ Hispanic.Total: chr  "774" "69,354" "217,753" "255,130" ...
##  $ Hispanic.F    : chr  "367" "33,865" "106,958" "125,532" ...
##  $ Hispanic.M    : chr  "407" "35,489" "110,795" "129,598" ...
##  $ NH.Black.Total: chr  "613" "40,016" "148,091" "167,778" ...
##  $ NH.Black.F    : chr  "291" "19,627" "72,777" "82,289" ...
##  $ NH.Black.M    : chr  "322" "20,389" "75,314" "85,489" ...
##  $ NH.White.Total: chr  "399" "71,854" "342,954" "594,375" ...
##  $ NH.White.F    : chr  "200" "35,080" "166,808" "288,993" ...
##  $ NH.White.M    : chr  "199" "36,774" "176,146" "305,382" ...
data2 <- data1
data2[-1] <- lapply(data2[-1], function(x) as.numeric(gsub(",", "", x)))
print(data2)
##   Age.Group Hispanic.Total Hispanic.F Hispanic.M NH.Black.Total NH.Black.F
## 1     Lt 15            774        367        407            613        291
## 2     15-19          69354      33865      35489          40016      19627
## 3     20-24         217753     106958     110795         148091      72777
## 4     25-29         255130     125532     129598         167778      82289
## 5     30-34         207352     101973     105379         122564      60561
## 6     35-39         116353      57069      59284          65129      31906
## 7     40-44          28835      14327      14508          15167       7551
## 8     45-49           1670        834        836           1220        603
## 9   50 plus            100         61         39            137         70
##   NH.Black.M NH.White.Total NH.White.F NH.White.M
## 1        322            399        200        199
## 2      20389          71854      35080      36774
## 3      75314         342954     166808     176146
## 4      85489         594375     288993     305382
## 5      62003         627425     305416     322009
## 6      33223         297050     144995     152055
## 7       7616          54062      26560      27502
## 8        617           3960       1910       2050
## 9         67            382        183        199
dataA <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 4\\Female Population RE 2017.csv")
str(dataA)
## 'data.frame':    7 obs. of  4 variables:
##  $ Age.Group.Code: chr  "15-19" "20-24" "25-29" "30-34" ...
##  $ Hispanic.F    : chr  "2,396,297" "2,337,914" "2,281,205" "2,149,865" ...
##  $ NH.Black.F    : chr  "1,574,012" "1,677,678" "1,773,228" "1,519,870" ...
##  $ NH.White.F    : chr  "5,648,792" "5,937,916" "6,424,521" "6,220,020" ...
dataB <- dataA
dataB[-1] <- lapply(dataB[-1], function(x) as.numeric(gsub(",", "", x)))
print(dataB)
##   Age.Group.Code Hispanic.F NH.Black.F NH.White.F
## 1          15-19    2396297    1574012    5648792
## 2          20-24    2337914    1677678    5937916
## 3          25-29    2281205    1773228    6424521
## 4          30-34    2149865    1519870    6220020
## 5          35-39    2130340    1475146    6056650
## 6          40-44    1994371    1351467    5643966
## 7          45-49    1826427    1413393    6488048
str(data2)
## 'data.frame':    9 obs. of  10 variables:
##  $ Age.Group     : chr  "Lt 15" "15-19" "20-24" "25-29" ...
##  $ Hispanic.Total: num  774 69354 217753 255130 207352 ...
##  $ Hispanic.F    : num  367 33865 106958 125532 101973 ...
##  $ Hispanic.M    : num  407 35489 110795 129598 105379 ...
##  $ NH.Black.Total: num  613 40016 148091 167778 122564 ...
##  $ NH.Black.F    : num  291 19627 72777 82289 60561 ...
##  $ NH.Black.M    : num  322 20389 75314 85489 62003 ...
##  $ NH.White.Total: num  399 71854 342954 594375 627425 ...
##  $ NH.White.F    : num  200 35080 166808 288993 305416 ...
##  $ NH.White.M    : num  199 36774 176146 305382 322009 ...
str(dataB)
## 'data.frame':    7 obs. of  4 variables:
##  $ Age.Group.Code: chr  "15-19" "20-24" "25-29" "30-34" ...
##  $ Hispanic.F    : num  2396297 2337914 2281205 2149865 2130340 ...
##  $ NH.Black.F    : num  1574012 1677678 1773228 1519870 1475146 ...
##  $ NH.White.F    : num  5648792 5937916 6424521 6220020 6056650 ...
# Crude Birth Rate calculation
total_births_hispanic <- sum(data2$Hispanic.Total)
total_births_black <- sum(data2$NH.Black.Total)
total_births_white <- sum(data2$NH.White.Total)
total_population_hispanic <- sum(dataB$Hispanic.F)
total_population_black <- sum(dataB$NH.Black.F)
total_population_white <- sum(dataB$NH.White.F)
cbr_hispanic <- (total_births_hispanic / total_population_hispanic) * 1000
cbr_black <- (total_births_black / total_population_black) * 1000
cbr_white <- (total_births_white / total_population_white) * 1000
list(CBR_Hispanic = cbr_hispanic,CBR_Black = cbr_black,CBR_White = cbr_white)
## $CBR_Hispanic
## [1] 59.36069
## 
## $CBR_Black
## [1] 51.99126
## 
## $CBR_White
## [1] 46.96995
# ASFR Calculation
data3 <- data2[2:8, ]
asfr_hispanic <- (data3$Hispanic.Total / dataB$Hispanic.F) * 1000

asfr_black <- (data3$NH.Black.Total / dataB$NH.Black.F) * 1000

asfr_white <- (data3$NH.White.Total / dataB$NH.White.F) * 1000
list(ASFR_Hispanic = asfr_hispanic,
  ASFR_Black = asfr_black,
  ASFR_White = asfr_white)
## $ASFR_Hispanic
## [1]  28.9421553  93.1398674 111.8400144  96.4488468  54.6171034  14.4581926
## [7]   0.9143535
## 
## $ASFR_Black
## [1] 25.4229320 88.2714085 94.6172743 80.6411075 44.1508840 11.2226196  0.8631711
## 
## $ASFR_White
## [1]  12.7202418  57.7566271  92.5166250 100.8718621  49.0452643   9.5787253
## [7]   0.6103531

TFR

# ASFR values for each group (from the provided image)
asfr_hispanic <- c(28.94, 93.14, 111.84, 96.50, 54.62, 14.50, 0.91)
asfr_black <- c(25.42, 88.27, 94.62, 80.64, 44.15, 11.22, 0.86)
asfr_white <- c(12.72, 57.76, 92.52, 100.87, 49.04, 9.60, 0.61)

# Calculate TFR for each group by summing the ASFRs and multiplying by 5
tfr_hispanic <- sum(asfr_hispanic) * 5
tfr_black <- sum(asfr_black) * 5
tfr_white <- sum(asfr_white) * 5

# Output the TFR values
cat("TFR for Hispanics:", tfr_hispanic, "\n")
## TFR for Hispanics: 2002.25
cat("TFR for Blacks:", tfr_black, "\n")
## TFR for Blacks: 1725.9
cat("TFR for Whites:", tfr_white, "\n")
## TFR for Whites: 1615.6

GRR

# Create a data frame with the provided data
df <- data.frame(
  Age.Group = c('15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49'),
  Hispanic.F.Pop = c(2396297, 2337914, 2281205, 2149865, 2130340, 1994371, 1826427),
  NH.Black.F.Pop = c(1574012, 1677678, 1773228, 1519870, 1475146, 1351467, 1413393),
  NH.White.F.Pop = c(5648792, 5937916, 6424521, 6220020, 6056650, 5643966, 6488048),
  Hispanic.F.Births = c(33865, 106958, 125532, 101973, 57969, 14327, 834),
  NH.Black.F.Births = c(19627, 72777, 82289, 60561, 31906, 7551, 603),
  NH.White.F.Births = c(35080, 166808, 288993, 305416, 144995, 26560, 1910)
)

# Calculate Age-Specific Fertility Rate (ASFR) for female births only
df$Hispanic.F.ASFR <- (df$Hispanic.F.Births / df$Hispanic.F.Pop) * 1000
df$NH.Black.F.ASFR <- (df$NH.Black.F.Births / df$NH.Black.F.Pop) * 1000
df$NH.White.F.ASFR <- (df$NH.White.F.Births / df$NH.White.F.Pop) * 1000

# Calculate Gross Reproduction Rate (GRR) by summing ASFR for female births only, and multiplying by 5 for 5-year age groups
GRR_Hispanic <- 5 * sum(df$Hispanic.F.ASFR)
GRR_NH_Black <- 5 * sum(df$NH.Black.F.ASFR)
GRR_NH_White <- 5 * sum(df$NH.White.F.ASFR)

# Print the GRR values
GRR_Hispanic
## [1] 985.9707
GRR_NH_Black
## [1] 848.7222
GRR_NH_White
## [1] 786.6359

NRR

# Create a data frame with the provided ASFR and survival probabilities (lx) for Non-Hispanic Black females
df <- data.frame(
  Age.Group = c('15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49'),
  NH.Black.F.Births = c(19627, 72777, 82289, 60561, 31906, 7551, 603),  # Female births
  NH.Black.F.Pop = c(1574012, 1677678, 1773228, 1519870, 1475146, 1351467, 1413393),  # Female population
  Survival.Prob = c(0.986, 0.984, 0.980, 0.975, 0.969, 0.955, 0.943)  # Survival probabilities (lx)
)

# Calculate ASFR (Age-Specific Fertility Rate) by dividing births by population and multiplying by 1000
df$ASFR <- (df$NH.Black.F.Births / df$NH.Black.F.Pop) * 1000

# Calculate the product of ASFR and survival probabilities for each age group
df$Product <- df$ASFR * df$Survival.Prob

# Calculate the NRR by summing the products and multiplying by 5 (for the 5-year intervals)
NRR_NH_Black <- 5 * sum(df$Product)

# Print the NRR
NRR_NH_Black
## [1] 830.0264
# Create a data frame with the provided ASFR and survival probabilities (lx) for Hispanic females
df_hispanic <- data.frame(
  Age.Group = c('15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49'),
  Hispanic.F.Births = c(33865, 106958, 125532, 101973, 57969, 14327, 834),  # Female births
  Hispanic.F.Pop = c(2396297, 2337914, 2281205, 2149865, 2130340, 1994371, 1826427),  # Female population
  Survival.Prob = c(0.986, 0.984, 0.980, 0.975, 0.969, 0.955, 0.943)  # Survival probabilities (lx)
)

# Calculate ASFR (Age-Specific Fertility Rate) by dividing births by population and multiplying by 1000
df_hispanic$ASFR <- (df_hispanic$Hispanic.F.Births / df_hispanic$Hispanic.F.Pop) * 1000

# Calculate the product of ASFR and survival probabilities for each age group
df_hispanic$Product <- df_hispanic$ASFR * df_hispanic$Survival.Prob

# Calculate the NRR by summing the products and multiplying by 5 (for the 5-year intervals)
NRR_Hispanic <- 5 * sum(df_hispanic$Product)

# Print the NRR
NRR_Hispanic
## [1] 963.9254
# Create a data frame with the provided ASFR and survival probabilities (lx) for Non-Hispanic White females
df_white <- data.frame(
  Age.Group = c('15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49'),
  NH.White.F.Births = c(35080, 166808, 288993, 305416, 144995, 26560, 1910),  # Female births
  NH.White.F.Pop = c(5648792, 5937916, 6424521, 6220020, 6056650, 5643966, 6488048),  # Female population
  Survival.Prob = c(0.986, 0.984, 0.980, 0.975, 0.969, 0.955, 0.943)  # Survival probabilities (lx)
)

# Calculate ASFR (Age-Specific Fertility Rate) by dividing births by population and multiplying by 1000
df_white$ASFR <- (df_white$NH.White.F.Births / df_white$NH.White.F.Pop) * 1000

# Calculate the product of ASFR and survival probabilities for each age group
df_white$Product <- df_white$ASFR * df_white$Survival.Prob

# Calculate the NRR by summing the products and multiplying by 5 (for the 5-year intervals)
NRR_NH_White <- 5 * sum(df_white$Product)

# Print the NRR
NRR_NH_White
## [1] 768.4644
# Install and load necessary libraries
# install.packages("ggplot2")
library(ggplot2)

# Correct data for each group
tfr_values <- c(2002.25, 1720.9, 1610.6)    # Total Fertility Rates (TFR)
grr_values <- c(985.97, 848.72, 786.64)     # Gross Reproduction Rates (GRR)
nrr_values <- c(963.93, 830.03, 768.46)     # Net Reproduction Rates (NRR)

# Create a data frame for the population groups and corresponding rates
categories <- c("TFR", "GRR", "NRR")
comparison_data <- data.frame(
  Rate_Type = factor(rep(categories, each = 3), levels = c("TFR", "GRR", "NRR")),  # Ensure correct order
  Group = factor(c("Hispanic", "Black", "White", "Hispanic", "Black", "White", "Hispanic", "Black", "White"), 
                 levels = c("Hispanic", "Black", "White")),  # Ensure correct order for groups
  Value = c(tfr_values, grr_values, nrr_values)
)

# Create the comparison graph
ggplot(comparison_data, aes(x = Rate_Type, y = Value, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of TFR, GRR, and NRR by Population Group",
       x = "Fertility Measure", y = "Fertility Rate") +
  theme_minimal() +
  theme(legend.title = element_blank())

# Load necessary libraries
library(ggplot2)
library(tidyr)

# Example ASFR data (replace these with your actual ASFR values)
age_groups <- c("15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49")

# Example ASFR calculations for each group (from previous calculations)
asfr_hispanic <- c(28.94, 93.14, 111.84, 96.50, 54.62, 14.50, .91)  # Use your actual calculated ASFRs
asfr_black <- c(25.42, 88.27, 94.62, 80.64, 44.15, 11.22, .86)   # Use your actual calculated ASFRs
asfr_white <- c(12.72, 57.76, 92.52, 100.87, 49.04, 9.60, .61)     # Use your actual calculated ASFRs

# Create a data frame for plotting
asfr_data <- data.frame(
  Age_Group = factor(age_groups, levels = age_groups),  # Ensure age groups are in correct order
  ASFR_Hispanic = asfr_hispanic,
  ASFR_Black = asfr_black,
  ASFR_White = asfr_white
)

# Reshape data for plotting with ggplot2
asfr_long <- gather(asfr_data, key = "Population", value = "ASFR", ASFR_Hispanic, ASFR_Black, ASFR_White)

# Plot the ASFRs for each population with the legend at the bottom
ggplot(asfr_long, aes(x = Age_Group, y = ASFR, group = Population, color = Population)) +
  geom_line(size = 1) +                  # Add lines to represent ASFR
  geom_point(size = 2) +                 # Add points to highlight data points
  labs(title = "Age-Specific Fertility Rates (ASFR) by Population Group", 
       x = "Age Group", y = "ASFR (per 1000 women)") +
  scale_color_manual(values = c("ASFR_Hispanic" = "blue", "ASFR_Black" = "red", "ASFR_White" = "green")) +
  theme_minimal() +                      # Apply minimal theme
  theme(legend.position = "bottom",      # Place legend at the bottom
        legend.direction = "horizontal", # Make the legend horizontal
        legend.title = element_blank())  # Remove legend title
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.