#Loading all libraries that may be needed
library(sf)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggmap)
library(scales)
library(gridExtra)
library(tidycensus)
library(cowplot)
library(OpenStreetMap)
library(rJava)
library(knitr)
library(gganimate)
library(ggmap)
library(transformr)
library(lubridate)
library(rayshader)
library(viridis)
library(rgl)
library(foreign)
library(marmap)
library(raster)
library(ggrepel)
library(RColorBrewer)
root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
# Loading Geography Data
Data <- st_read("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\shapefile\\gadm36_NGA_1.shp")
# Loading Survey Data
Survey <- read.dta("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\NGPR7AFL.DTA")
Question 1
How many Positive Blood Smear Test Children under 5?
Steps
Filter to those with ages under 6 (corrective age)
Filter those who tested positive for a Blood Smear
Ensure no repeat Ids so an indiviual is not counted
twice
ANSWER = 1546
#Answering qustion 1 --- filtering the survey by those with a blood smear who tested positive and are under the age of 6 based on their corrective age
Under5_BloodSmear <- Survey%>%
filter(hml16 < 6 & hml32 == 'positive')%>%
## selecting only these three categories
dplyr::select(hml32, hhid)
#Counting, ensuring no repeat IDs
x <- count(distinct(Under5_BloodSmear))
kable(x)
Question 2
How Many Rapid Tests for Children Under 5?
Steps
Filter to children under 6 based on their corrective
age.
Filter to where rapid blood test *hml35) is equal to positive,
negative, or missing indicating a test was making
Ensure there are no duplicate IDs.
ANSWER = 10815
#Filtering by those that either tested negative or positive for the rapid blood test and have a corrective age under the age of 6
Under5_Rapid <- Survey%>%
#Selecting the relevant fields
dplyr::select(hhid, hml16, hml35)%>%
#Filtering
filter(hml16 < 6 & (hml35 == 'positive' | hml35 == 'negative' | hml35 == 'missing'))
# Counting ensuring no repeat ids
x <- count(distinct(Under5_Rapid))
kable(x)
Question 3
How many Microscopy Positive Results for Children?
STEPS
Using blood smear test as the microscopy result
filtered to those that tested positive for the blood
smear
Also filtered to those that were selected for a
microscopy
Use corrective age to ensure age under 18
Remove duplicate Ids
ANSWER = 1546
Microscopy <- Survey%>%
filter(hml16 < 18 & hml32 == 'positive', sbelig == "yes, selected for microscopy")%>%
dplyr::select(hhid, sbelig, hml32)
# Ensuring no duplicate IDs
x <- count(distinct(Microscopy))
kable(x)
Question 4
How many Positive Rapid Tests for Children?
STEPS
Filter by corrective age to under 18 and rapid test to
positive
Ensure no duplicate IDs
ANSWER = 3229
#Filtering by rapid test
RapidChildren <- Survey%>%
filter(hml16 < 18 & hml35 == 'positive') %>%
dplyr::select(hhid, ha50, hml35)
x <- count(distinct(RapidChildren))
kable(x)
Question 5
Make a plot showing the number of U5 children that tested positive
for malaria by microscopy and the number of U5 children that tested
negative for malaria by microscopy in urban and rural areas
STEPS
Filtered by Children under 5
Create categories for rural/urban, positive status/ negative
status using new fields
Ensure no duplicate IDs
Reformat table
Create Graph
GraphQ5 <-
# Take Survey Data
Survey%>%
#Select necessary columns
dplyr::select(hhid, hv025, hml16, sbelig, hml32)%>%
#Filtering by corrective age under 6
filter(hml16 < 6) %>%
# Create new field for rural and negative
mutate(Rural = ifelse(hv025 == 'rural' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>%
# Create new field for urban and negative
mutate(Urban = ifelse(hv025 == 'urban' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>%
#Create new field for positive and rural
mutate(pos = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'rural', 1, 0))%>%
#Create new field for positive and urban
mutate(pos2 = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'urban', 1, 0))%>%
#Select new proportion fields and ID
dplyr::select(hhid, Urban, Rural, pos, pos2)%>%
#Filter down to children that euqal 1 for at least one of the four created fields
filter(Urban == 1 | Rural == 1 | pos == 1 | pos2 == 1)%>%
#remove duplicate IDs
distinct()%>%
#Select proportion fields
dplyr::select(Urban, Rural, pos, pos2)%>%
#Sum up each category
summarise(Pos = sum(pos), Pos2 = sum(pos2), Rural = sum(Rural), Urban = sum(Urban))%>%
#Rename fields
rename(`Positive for Malaria ; Rural` = Pos, `Positive for Malaria ; Urban` = Pos2,`Negative for Malaria ; Rural`= Rural, `Negative for Malaria ; Urban` = Urban)%>%
#pivot table to set up for ggplot
pivot_longer(
cols = `Positive for Malaria ; Rural` :`Negative for Malaria ; Urban`,
names_to = "Category",
values_to = "Children")%>%
#Make sure number of children is numeric
mutate(Children = as.numeric(Children))
ggplot()+
geom_bar(data = GraphQ5, aes(x = Category, y = Children, fill = Category ), stat = 'identity')+
labs(title = "Number of Nigerian Children by Malaria Status via Microscopy",
subtitle = "Source: Nigerian Health Survey")+
ylab("Number of Children") +
scale_fill_manual(values = c( '#E59E9E','#E62020', '#00F38A','#008C4F'))+
plotTheme()+
theme(legend.position = "none")

Question 6
Make another plot showing the proportion of U5 children that tested
positive for malaria by microscopy and the proportion of U5 children
that tested negative for malaria by microscopy in urban and rural
areas
STEPS
Use Question 5 analysis
Calculate proportions
Calculate pie chart positioning
Create graph using GGPlot
options(digits = 3)
GraphQ5_Prop <-
#Taking previous analysis from Question 5
GraphQ5%>%
#Calculating proportions for each category
mutate(Proportion = (Children/ sum(GraphQ5$Children)* 100))%>%
#Selecting only category and proportion
dplyr::select(Category, Proportion)%>%
#Field to define label positioning on pie chart
mutate(csum = rev(cumsum(rev(Proportion))),
pos = Proportion/2 + lead(csum, 1),
pos = if_else(is.na(pos), Proportion/2, pos))
ggplot(GraphQ5_Prop, aes(x="", y=Proportion, fill=fct_inorder(Category))) +
geom_col(width = 1) +
coord_polar(theta = "y", start = 0) +
theme_void() +
theme(legend.position = "right")+
labs(title = "Percentage of Children Testing Positive or Negative for Malaria by Geographic Region",
subtitle = "Source: Nigeran Health Survey")+
geom_label_repel(data = GraphQ5_Prop,
aes(y = pos, label = paste0(as.integer(Proportion), "%")),
size = 3.5, nudge_x = 1, show.legend = FALSE)+
scale_fill_manual(values = c( '#00F38A','#008C4F','#E59E9E','#E62020'))+
guides(fill=guide_legend(title="Categories"))

Question 7
Make a map of the number of children that tested positive for malaria
by state
STEPS
Filter by positive rapid or positive blood smear, along with
under 18 corrective age
Add new feild for count
Ensure no duplicate IDs
Group by State
Sum Count
Join state data to survey data
Create map
ByState <-
Survey%>%
#Filter by children positive for rapid test or positive for blood smear
filter((hml32 == 'positive' | hml35 == 'positive') & hml16 < 18) %>%
#New field where count is one (going to be used for aggregation)
mutate(Count = 1)%>%
#Select necessary fields
dplyr::select(hhid, shstate, Count)%>%
#Remmove repeat IDS
distinct() %>%
#Group by state
group_by(shstate) %>%
#Sum by summing up the count field previously created
summarise(ChildCases = sum(Count)) %>%
#Dealing with NA Values
mutate(ChildCases = ifelse(is.na(ChildCases) == T, 0, ChildCases))
Geo <-
Data%>%
#mutating state name to lowercase for join
mutate(shstate = tolower(NAME_1))%>%
#joining by State
left_join(ByState, by = "shstate")
ggplot() +
geom_sf(data = Geo, aes(fill = q5(ChildCases))) +
scale_fill_manual(values = c('#FFEBEB', '#FFAFAD','#FF221F','#E00400', '#A30300' ),
labels = c("0-23", "24-70", "71-99", "100 - 120", "121 - 176", "N/A"),
name = "Child Malaria Cases \n(Quintile Breaks)") +
labs(title = "Nigerian Malaria Cases by Region", subtitle = "Source: Nigerian Health Survey") +
mapTheme()

Question 8
Extract the data from the raster file and make a map showing average
housing quality values for each Nigerian state.
STEPS
Import Raster
Transform to Poly
Transform to SF Class
Intersect Nigerian Stateboundaries with SF Class
Group by State
Summarise average housing quality
Create Map
#Importing Raster
NigerianHousing <- raster("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\housing data 2019 nigeria\\2019_Nature_Africa_Housing_2015_NGA.tiff")
#Raster to Poly
poly<- rasterToPolygons(NigerianHousing)
# To SF Class
poly <- st_as_sf(poly)
#Intersecting new poly with nigerian raster
Aggregate <-
st_intersection(poly, Geo)
Aggregate1<-
Aggregate%>%
#Grouping by state
group_by(shstate)%>%
#Summarise by average housing quality
summarise(`Average Housing Quality` = mean(X2019_Nature_Africa_Housing_2015_NGA))
ggplot() +
geom_sf(data = Aggregate1, aes(fill = `Average Housing Quality`)) +
scale_fill_gradient(low = "#EEFFEE", high = "#335533") +
labs(title = "Nigerian Average Housing Quality Score by Region") +
mapTheme()

Question 9
Identify any website of your choice and scrub their data. Pull it
into a csv and make at least one data visualization.
STEPS
Identified mortality dataset of Balitmore
Downloaded all associed data
Joined data with younger populations together — split up from
older populations so that we can have one scale for younger populations
and one scale for older populations as the mortality rate is higher in
older populations
Alter table to prepare for a facet wrap map
Create map
Repeat for older populations
M1To14<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort14__-7131955727435385779.geojson')%>%
dplyr::select(CSA2010, mort14_18)
M15To24<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort24__440437359064296490.geojson')%>%
dplyr::select(CSA2010, mort24_18)
M24To44<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort44__6262205918832827315.geojson')%>%
dplyr::select(CSA2010, mort44_18)
M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>%
dplyr::select(CSA2010, mort64_18)
M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>%
dplyr::select(CSA2010, mort64_18)
M65To84<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort84__7080676275320674118.geojson')%>%
dplyr::select(CSA2010, mort84_18)
MortalityData <-
M1To14%>%
st_drop_geometry()%>%
left_join(., M15To24)%>%
right_join(., M24To44)%>%
rename(`Ages 1 to 14` = mort14_18,
`Ages 15 to 24` = mort24_18,
`Ages 25 to 44` = mort44_18)%>%
pivot_longer(
cols = c(`Ages 1 to 14`,`Ages 15 to 24`,`Ages 25 to 44`) ,
names_to = "Age Category",
values_to = "Mortality")%>%
st_as_sf()
ggplot() +
geom_sf(data = MortalityData, aes(fill = Mortality)) +
scale_fill_gradient(low = "#FFEEEE", high = "#773333") +
labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Younger Populations") +
mapTheme()+
facet_wrap(vars(`Age Category`))

MortalityData_Older <-
M45To64%>%
st_drop_geometry()%>%
left_join(., M65To84)%>%
rename(`Ages 45 to 64` = mort64_18,
`Ages 65 to 84` = mort84_18)%>%
pivot_longer(
cols = c(`Ages 45 to 64`,`Ages 65 to 84`) ,
names_to = "Age Category",
values_to = "Mortality")%>%
st_as_sf()
ggplot() +
geom_sf(data = MortalityData_Older, aes(fill = Mortality)) +
scale_fill_gradient(low = "#FFEEEE", high = "#773333") +
labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Older Populations") +
mapTheme()+
facet_wrap(vars(`Age Category`))

