library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data3 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment2\\Bexar County Population estimates 2020.csv")
data4 <- data3[apply(data3, 1, function(row) all(!is.na(row) & row != "")), ]
str(data4)
## 'data.frame': 1375 obs. of 9 variables:
## $ Age : chr "< 1 year" "< 1 year" "< 1 year" "< 1 year" ...
## $ Age.Code : chr "0" "0" "0" "0" ...
## $ Race : chr "American Indian or Alaska Native" "American Indian or Alaska Native" "American Indian or Alaska Native" "American Indian or Alaska Native" ...
## $ Race.Code : chr "1002-5" "1002-5" "1002-5" "1002-5" ...
## $ Ethnicity : chr "Hispanic or Latino" "Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" ...
## $ Ethnicity.Code: chr "2135-2" "2135-2" "2186-5" "2186-5" ...
## $ Gender : chr "Female" "Male" "Female" "Male" ...
## $ Gender.Code : chr "F" "M" "F" "M" ...
## $ Population : int 251 261 34 34 129 133 427 448 286 300 ...
data4 <- data4 %>%
mutate(Age.Code = as.numeric(Age.Code))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age.Code = as.numeric(Age.Code)`.
## Caused by warning:
## ! NAs introduced by coercion
data4a <- na.omit(data4)
data4b<-data4a%>%
filter(Ethnicity.Code=="2135-2")
summary(data4b)
## Age Age.Code Race Race.Code
## Length:680 Min. : 0 Length:680 Length:680
## Class :character 1st Qu.:21 Class :character Class :character
## Mode :character Median :42 Mode :character Mode :character
## Mean :42
## 3rd Qu.:63
## Max. :84
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:680 Length:680 Length:680 Length:680
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 1.0
## 1st Qu.: 69.0
## Median : 152.0
## Mean :1797.6
## 3rd Qu.: 576.5
## Max. :9914.0
str(data4b)
## 'data.frame': 680 obs. of 9 variables:
## $ Age : chr "< 1 year" "< 1 year" "< 1 year" "< 1 year" ...
## $ Age.Code : num 0 0 0 0 0 0 0 0 1 1 ...
## $ Race : chr "American Indian or Alaska Native" "American Indian or Alaska Native" "Asian or Pacific Islander" "Asian or Pacific Islander" ...
## $ Race.Code : chr "1002-5" "1002-5" "A-PI" "A-PI" ...
## $ Ethnicity : chr "Hispanic or Latino" "Hispanic or Latino" "Hispanic or Latino" "Hispanic or Latino" ...
## $ Ethnicity.Code: chr "2135-2" "2135-2" "2135-2" "2135-2" ...
## $ Gender : chr "Female" "Male" "Female" "Male" ...
## $ Gender.Code : chr "F" "M" "F" "M" ...
## $ Population : int 251 261 129 133 286 300 8031 8320 229 223 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 ...
## ..- attr(*, "names")= chr [1:16] "2465" "2466" "2468" "2469" ...
data4b%>%
summarise(mean_age = weighted.mean(data4b$Age.Code, w = data4b$Population))
data4bfemale<-data4b%>%
filter(Gender=="Female")
summary(data4bfemale)
## Age Age.Code Race Race.Code
## Length:340 Min. : 0 Length:340 Length:340
## Class :character 1st Qu.:21 Class :character Class :character
## Mode :character Median :42 Mode :character Mode :character
## Mean :42
## 3rd Qu.:63
## Max. :84
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:340 Length:340 Length:340 Length:340
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 3.00
## 1st Qu.: 74.75
## Median : 153.50
## Mean :1826.87
## 3rd Qu.: 703.50
## Max. :9530.00
data4bfemale%>%
summarise(mean_age = weighted.mean(data4bfemale$Age.Code, w =data4bfemale$Population))
data4bmale<-data4b%>%
filter(Gender=="Male")
summary(data4bmale)
## Age Age.Code Race Race.Code
## Length:340 Min. : 0 Length:340 Length:340
## Class :character 1st Qu.:21 Class :character Class :character
## Mode :character Median :42 Mode :character Mode :character
## Mean :42
## 3rd Qu.:63
## Max. :84
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:340 Length:340 Length:340 Length:340
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 1.0
## 1st Qu.: 64.0
## Median : 151.0
## Mean :1768.2
## 3rd Qu.: 576.5
## Max. :9914.0
data4bmale%>%
summarise(mean_age = weighted.mean(data4bmale$Age.Code, w =data4bmale$Population))
youth_population <- sum(data4b$Population[data4b$Age.Code >= 0 & data4b$Age.Code <= 14])
working_population <- sum(data4b$Population[data4b$Age.Code >= 15 & data4b$Age.Code <= 64])
old_age_population <- sum(data4b$Population[data4b$Age.Code >= 65])
youth_dependency_ratio <- (youth_population / working_population) * 100
old_age_dependency_ratio <- (old_age_population / working_population) * 100
total_dependency_ratio <- ((youth_population + old_age_population) / working_population) * 100
cat("Youth Dependency Ratio:", sprintf("%.2f", youth_dependency_ratio), "\n")
## Youth Dependency Ratio: 35.07
cat("Old-Age Dependency Ratio:", sprintf("%.2f", old_age_dependency_ratio), "\n")
## Old-Age Dependency Ratio: 13.48
cat("Total Dependency Ratio:", sprintf("%.2f", total_dependency_ratio), "\n")
## Total Dependency Ratio: 48.55
str(data4b)
## 'data.frame': 680 obs. of 9 variables:
## $ Age : chr "< 1 year" "< 1 year" "< 1 year" "< 1 year" ...
## $ Age.Code : num 0 0 0 0 0 0 0 0 1 1 ...
## $ Race : chr "American Indian or Alaska Native" "American Indian or Alaska Native" "Asian or Pacific Islander" "Asian or Pacific Islander" ...
## $ Race.Code : chr "1002-5" "1002-5" "A-PI" "A-PI" ...
## $ Ethnicity : chr "Hispanic or Latino" "Hispanic or Latino" "Hispanic or Latino" "Hispanic or Latino" ...
## $ Ethnicity.Code: chr "2135-2" "2135-2" "2135-2" "2135-2" ...
## $ Gender : chr "Female" "Male" "Female" "Male" ...
## $ Gender.Code : chr "F" "M" "F" "M" ...
## $ Population : int 251 261 129 133 286 300 8031 8320 229 223 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 ...
## ..- attr(*, "names")= chr [1:16] "2465" "2466" "2468" "2469" ...
data4b$age_group <- cut(data4b$Age.Code,
breaks = c(-1,0, 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84),
labels = c("<1", "1-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84"),
right = TRUE) # Include the right endpoint in intervals (i.e., 4 means ages up to 4 years)
# Display the dataset with new age groups
head(data4b)
data4c<-subset(data4b,select=c(age_group,Population,Gender))%>%
group_by(age_group,Gender) %>%
summarise(population = sum(Population, na.rm = TRUE))
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
data4d<-data4a%>%
filter(Ethnicity.Code=="2186-5")
summary(data4d)
## Age Age.Code Race Race.Code
## Length:679 Min. : 0.00 Length:679 Length:679
## Class :character 1st Qu.:21.00 Class :character Class :character
## Mode :character Median :42.00 Mode :character Mode :character
## Mean :41.94
## 3rd Qu.:63.00
## Max. :84.00
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:679 Length:679 Length:679 Length:679
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 4
## 1st Qu.: 62
## Median : 580
## Mean :1142
## 3rd Qu.:1480
## Max. :4684
data4d%>%
summarise(mean_age = weighted.mean(data4d$Age.Code, w = data4d$Population))
data4dfemale<-data4d%>%
filter(Gender=="Female")
summary(data4dfemale)
## Age Age.Code Race Race.Code
## Length:340 Min. : 0 Length:340 Length:340
## Class :character 1st Qu.:21 Class :character Class :character
## Mode :character Median :42 Mode :character Mode :character
## Mean :42
## 3rd Qu.:63
## Max. :84
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:340 Length:340 Length:340 Length:340
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 4
## 1st Qu.: 67
## Median : 587
## Mean :1136
## 3rd Qu.:1460
## Max. :4220
data4dfemale%>%
summarise(mean_age = weighted.mean(data4dfemale$Age.Code, w =data4dfemale$Population))
data4dmale<-data4d%>%
filter(Gender=="Male")
summary(data4dmale)
## Age Age.Code Race Race.Code
## Length:339 Min. : 0.00 Length:339 Length:339
## Class :character 1st Qu.:21.00 Class :character Class :character
## Mode :character Median :42.00 Mode :character Mode :character
## Mean :41.88
## 3rd Qu.:63.00
## Max. :84.00
## Ethnicity Ethnicity.Code Gender Gender.Code
## Length:339 Length:339 Length:339 Length:339
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Population
## Min. : 4
## 1st Qu.: 58
## Median : 571
## Mean :1149
## 3rd Qu.:1487
## Max. :4684
data4dmale%>%
summarise(mean_age = weighted.mean(data4dmale$Age.Code, w =data4dmale$Population))
youth_population1 <- sum(data4d$Population[data4d$Age.Code >= 0 & data4d$Age.Code <= 14])
working_population1 <- sum(data4d$Population[data4d$Age.Code >= 15 & data4d$Age.Code <= 64])
old_age_population1 <- sum(data4d$Population[data4d$Age.Code >= 65])
youth_dependency_ratio1 <- (youth_population1 / working_population1) * 100
old_age_dependency_ratio1 <- (old_age_population1 / working_population1) * 100
total_dependency_ratio1 <- ((youth_population1 + old_age_population1) / working_population1) * 100
cat("Youth Dependency Ratio1:", sprintf("%.2f", youth_dependency_ratio1), "\n")
## Youth Dependency Ratio1: 25.68
cat("Old-Age Dependency Ratio1:", sprintf("%.2f", old_age_dependency_ratio1), "\n")
## Old-Age Dependency Ratio1: 22.16
cat("Total Dependency Ratio1:", sprintf("%.2f", total_dependency_ratio1), "\n")
## Total Dependency Ratio1: 47.84
str(data4d)
## 'data.frame': 679 obs. of 9 variables:
## $ Age : chr "< 1 year" "< 1 year" "< 1 year" "< 1 year" ...
## $ Age.Code : num 0 0 0 0 0 0 0 0 1 1 ...
## $ Race : chr "American Indian or Alaska Native" "American Indian or Alaska Native" "Asian or Pacific Islander" "Asian or Pacific Islander" ...
## $ Race.Code : chr "1002-5" "1002-5" "A-PI" "A-PI" ...
## $ Ethnicity : chr "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" "Not Hispanic or Latino" ...
## $ Ethnicity.Code: chr "2186-5" "2186-5" "2186-5" "2186-5" ...
## $ Gender : chr "Female" "Male" "Female" "Male" ...
## $ Gender.Code : chr "F" "M" "F" "M" ...
## $ Population : int 34 34 427 448 1033 1073 2728 2853 29 28 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 ...
## ..- attr(*, "names")= chr [1:16] "2465" "2466" "2468" "2469" ...
data4d$age_group <- cut(data4d$Age.Code,
breaks = c(-1,0, 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84),
labels = c("<1", "1-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84"),
right = TRUE) # Include the right endpoint in intervals (i.e., 4 means ages up to 4 years)
# Display the dataset with new age groups
head(data4d)
data4e<-subset(data4d,select=c(age_group,Population,Gender))%>%
group_by(age_group,Gender) %>%
summarise(population = sum(Population, na.rm = TRUE))
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
data4b <- data4b %>%
mutate(Population = ifelse(Gender == "Male", -Population, Population))
# Create the population pyramid
ggplot(data4b, aes(x = Age.Code, y = Population, fill = Gender)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates for pyramid style
scale_y_continuous(labels = abs) + # Show positive labels on the y-axis
labs(y = "Population", x = "Age", title = "Bexar County Population Pyramid for Hispanics, 2020") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()
### Population Pyramid 1a
data4c <- data4c %>%
mutate(population = ifelse(Gender == "Male", -population, population))
# Create the population pyramid
ggplot(data4c, aes(x = age_group, y = population, fill = Gender)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates for pyramid style
scale_y_continuous(labels = abs) + # Show positive labels on the y-axis
labs(y = "Population", x = "Age Interval", title = "Bexar County Population Pyramid for Hispanics, 2020") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()
data4d <- data4d %>%
mutate(Population = ifelse(Gender == "Male", -Population, Population))
# Create the population pyramid
ggplot(data4d, aes(x = Age.Code, y = Population, fill = Gender)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates for pyramid style
scale_y_continuous(labels = abs) + # Show positive labels on the y-axis
labs(y = "Population", x = "Age", title = "Bexar County Population Pyramid for Non-hispanics, 2020") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()
data4e <- data4e %>%
mutate(population = ifelse(Gender == "Male", -population, population))
# Create the population pyramid
ggplot(data4e, aes(x = age_group, y = population, fill = Gender)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip coordinates for pyramid style
scale_y_continuous(labels = abs) + # Show positive labels on the y-axis
labs(y = "Population", x = "Age Interval", title = "Bexar County Population Pyramid for Non-hispanics, 2020") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()
### Graph: Hispanics
data4b_graph <- data4b %>%
group_by(Age.Code) %>%
summarize(sumpopulation = sum(Population))
data4b_graph1 <- data4b %>%
group_by(Age.Code,Gender) %>%
summarize(sumpopulation = sum(Population))
## `summarise()` has grouped output by 'Age.Code'. You can override using the
## `.groups` argument.
ggplot() +
geom_line(data = data4b_graph, aes(Age.Code, sumpopulation)) +
geom_line(data = data4b_graph1, aes(Age.Code, sumpopulation, color = Gender)) +
labs(
title = "Bexar County age structure for Hispanics, 2020", # Adding a title
x = "Age", # Label for the x-axis
y = "Population" # Label for the y-axis
) +
scale_x_continuous(breaks = seq(0, 85, by = 5)) +scale_y_continuous(breaks = seq(0, 20000, by = 2500))
data4d_graph <- data4d %>%
group_by(Age.Code) %>%
summarize(sumpopulation = sum(Population))
data4d_graph1 <- data4d %>%
group_by(Age.Code,Gender) %>%
summarize(sumpopulation = sum(Population))
## `summarise()` has grouped output by 'Age.Code'. You can override using the
## `.groups` argument.
ggplot() +
geom_line(data = data4d_graph, aes(Age.Code, sumpopulation)) +
geom_line(data = data4d_graph1, aes(Age.Code, sumpopulation, color = Gender)) +
labs(
title = "Bexar County age structure for Non-hispanics, 2020", # Adding a title
x = "Age", # Label for the x-axis
y = "Population" # Label for the y-axis
) +
scale_x_continuous(breaks = seq(0, 85, by = 5)) +scale_y_continuous(breaks = seq(0, 20000, by = 2500))