#Loading all libraries that may be needed

library(sf) 
library(tidyverse) 
library(dplyr) 
library(ggplot2) 
library(ggmap) 
library(scales)
library(gridExtra)
library(tidycensus)
library(cowplot) 
library(OpenStreetMap)
library(rJava)
library(knitr)
library(gganimate)
library(ggmap)
library(transformr)
library(lubridate)
library(rayshader)
library(viridis)
library(rgl)
library(foreign)
library(marmap)
library(raster)
library(ggrepel)
library(RColorBrewer)

root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
# Loading Geography Data
Data <- st_read("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\shapefile\\gadm36_NGA_1.shp")

# Loading Survey Data
Survey <- read.dta("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\NGPR7AFL.DTA")

Question 1

How many Positive Blood Smear Test Children under 5?

Steps

  1. Filter to those with ages under 6 (corrective age)

  2. Filter those who tested positive for a Blood Smear

  3. Ensure no repeat Ids so an indiviual is not counted twice

ANSWER = 1546

#Answering qustion 1 --- filtering the survey by those with a blood smear who tested positive and are under the age of 6 based on their corrective age

Under5_BloodSmear <- Survey%>%
  filter(hml16 < 6 & hml32 == 'positive')%>%
  ## selecting only these three categories
  dplyr::select(hml32, hhid)

#Counting, ensuring no repeat IDs 

x <- count(distinct(Under5_BloodSmear))

kable(x)
n
1546

Question 2

How Many Rapid Tests for Children Under 5?

Steps

  1. Filter to children under 6 based on their corrective age.

  2. Filter to where rapid blood test *hml35) is equal to positive, negative, or missing indicating a test was making

  3. Ensure there are no duplicate IDs.

ANSWER = 10815

#Filtering by those that either tested negative or positive for the rapid blood test and have a corrective age under the age of 6

Under5_Rapid <- Survey%>%
  #Selecting the relevant fields
  dplyr::select(hhid, hml16, hml35)%>%
  #Filtering
  filter(hml16 < 6  & (hml35 == 'positive' | hml35 == 'negative' | hml35 == 'missing'))

# Counting ensuring no repeat ids
x <- count(distinct(Under5_Rapid))

kable(x)
n
10815

Question 3

How many Microscopy Positive Results for Children?

STEPS

  1. Using blood smear test as the microscopy result

  2. filtered to those that tested positive for the blood smear

  3. Also filtered to those that were selected for a microscopy

  4. Use corrective age to ensure age under 18

  5. Remove duplicate Ids

ANSWER = 1546

Microscopy <- Survey%>%
  filter(hml16 < 18  &  hml32 == 'positive', sbelig == "yes, selected for microscopy")%>% 
  dplyr::select(hhid, sbelig, hml32)

  
# Ensuring no duplicate IDs
x <- count(distinct(Microscopy))

kable(x)
n
1546

Question 4

How many Positive Rapid Tests for Children?

STEPS

  1. Filter by corrective age to under 18 and rapid test to positive

  2. Ensure no duplicate IDs

ANSWER = 3229

#Filtering by rapid test 
RapidChildren <- Survey%>%
  filter(hml16 < 18 & hml35 == 'positive') %>% 
  dplyr::select(hhid, ha50, hml35)

x <- count(distinct(RapidChildren))

kable(x)
n
3229

Question 5

Make a plot showing the number of U5 children that tested positive for malaria by microscopy and the number of U5 children that tested negative for malaria by microscopy in urban and rural areas

STEPS

  1. Filtered by Children under 5

  2. Create categories for rural/urban, positive status/ negative status using new fields

  3. Ensure no duplicate IDs

  4. Reformat table

  5. Create Graph

GraphQ5 <- 
  # Take Survey Data
  Survey%>% 
  #Select necessary columns
  dplyr::select(hhid, hv025, hml16, sbelig, hml32)%>%
  #Filtering by corrective age under 6
  filter(hml16 < 6) %>% 
  # Create new field for rural and negative
  mutate(Rural = ifelse(hv025 == 'rural' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>% 
  # Create new field for urban and negative
  mutate(Urban = ifelse(hv025 == 'urban' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>% 
  #Create new field for positive and rural
  mutate(pos = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'rural', 1, 0))%>% 
  #Create new field for positive and urban
  mutate(pos2 = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'urban', 1, 0))%>%
  #Select new proportion fields and ID
  dplyr::select(hhid, Urban, Rural, pos, pos2)%>% 
  #Filter down to children that euqal 1 for at least one of the four created fields
  filter(Urban == 1  | Rural == 1 | pos == 1 | pos2 == 1)%>% 
  #remove duplicate IDs
  distinct()%>% 
  #Select proportion fields
  dplyr::select(Urban, Rural, pos, pos2)%>% 
  #Sum up each category
  summarise(Pos = sum(pos), Pos2 = sum(pos2),  Rural = sum(Rural), Urban = sum(Urban))%>% 
  #Rename fields
  rename(`Positive for Malaria ; Rural` = Pos, `Positive for Malaria ; Urban` = Pos2,`Negative for Malaria ; Rural`= Rural, `Negative for Malaria ; Urban` = Urban)%>% 
  #pivot table to set up for ggplot
  pivot_longer(
    cols = `Positive for Malaria ; Rural` :`Negative for Malaria ; Urban`,
    names_to = "Category", 
    values_to = "Children")%>% 
  #Make sure number of children is numeric
  mutate(Children = as.numeric(Children))

ggplot()+ 
  geom_bar(data = GraphQ5, aes(x = Category, y = Children, fill = Category ), stat = 'identity')+ 
  labs(title = "Number of Nigerian Children by Malaria Status via Microscopy", 
       subtitle = "Source: Nigerian Health Survey")+ 
  ylab("Number of Children") + 
  scale_fill_manual(values = c( '#E59E9E','#E62020', '#00F38A','#008C4F'))+
  plotTheme()+ 
  theme(legend.position = "none")

Question 6

Make another plot showing the proportion of U5 children that tested positive for malaria by microscopy and the proportion of U5 children that tested negative for malaria by microscopy in urban and rural areas

STEPS

  1. Use Question 5 analysis

  2. Calculate proportions

  3. Calculate pie chart positioning

  4. Create graph using GGPlot

options(digits = 3) 

GraphQ5_Prop <- 
  #Taking previous analysis from Question 5
  GraphQ5%>% 
  #Calculating proportions for each category
  mutate(Proportion = (Children/ sum(GraphQ5$Children)* 100))%>%
  #Selecting only category and proportion
  dplyr::select(Category, Proportion)%>%
  #Field to define label positioning on pie chart
  mutate(csum = rev(cumsum(rev(Proportion))), 
         pos = Proportion/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Proportion/2, pos))


ggplot(GraphQ5_Prop, aes(x="", y=Proportion, fill=fct_inorder(Category))) +
  geom_col(width = 1) +
  coord_polar(theta = "y", start = 0) +
  theme_void() + 
  theme(legend.position = "right")+
  labs(title = "Percentage of Children Testing Positive or Negative for Malaria by Geographic Region", 
       subtitle = "Source: Nigeran Health Survey")+
    geom_label_repel(data = GraphQ5_Prop,
                   aes(y = pos, label = paste0(as.integer(Proportion), "%")),
                   size = 3.5, nudge_x = 1, show.legend = FALSE)+
  scale_fill_manual(values = c( '#00F38A','#008C4F','#E59E9E','#E62020'))+ 
  guides(fill=guide_legend(title="Categories"))

Question 7

Make a map of the number of children that tested positive for malaria by state

STEPS

  1. Filter by positive rapid or positive blood smear, along with under 18 corrective age

  2. Add new feild for count

  3. Ensure no duplicate IDs

  4. Group by State

  5. Sum Count

  6. Join state data to survey data

  7. Create map

ByState <- 
  Survey%>%
  #Filter by children positive for rapid test or positive for blood smear
  filter((hml32 == 'positive' | hml35 == 'positive') & hml16 < 18) %>%
  #New field where count is one (going to be used for aggregation)
  mutate(Count = 1)%>%
  #Select necessary fields
  dplyr::select(hhid, shstate, Count)%>% 
  #Remmove repeat IDS
  distinct() %>% 
  #Group by state
  group_by(shstate) %>%
  #Sum by summing up the count field previously created
  summarise(ChildCases = sum(Count)) %>% 
  #Dealing with NA Values
  mutate(ChildCases = ifelse(is.na(ChildCases) == T, 0, ChildCases))

Geo <- 
  Data%>% 
  #mutating state name to lowercase for join
  mutate(shstate = tolower(NAME_1))%>% 
  #joining by State
  left_join(ByState, by = "shstate") 

 ggplot() +
  geom_sf(data = Geo, aes(fill = q5(ChildCases))) +
  scale_fill_manual(values = c('#FFEBEB', '#FFAFAD','#FF221F','#E00400', '#A30300' ), 
                    labels = c("0-23", "24-70", "71-99", "100 - 120", "121 - 176", "N/A"), 
                    name = "Child Malaria Cases \n(Quintile Breaks)") +
  labs(title = "Nigerian Malaria Cases by Region", subtitle = "Source: Nigerian Health Survey") +
  mapTheme()

Question 8

Extract the data from the raster file and make a map showing average housing quality values for each Nigerian state.

STEPS

  1. Import Raster

  2. Transform to Poly

  3. Transform to SF Class

  4. Intersect Nigerian Stateboundaries with SF Class

  5. Group by State

  6. Summarise average housing quality

  7. Create Map

#Importing Raster
NigerianHousing <- raster("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\housing data 2019 nigeria\\2019_Nature_Africa_Housing_2015_NGA.tiff")
#Raster to Poly
poly<- rasterToPolygons(NigerianHousing)

# To SF Class
poly <- st_as_sf(poly)

#Intersecting new poly with nigerian raster
Aggregate <- 
  st_intersection(poly, Geo)

Aggregate1<- 
  Aggregate%>%
  #Grouping by state
  group_by(shstate)%>%
  #Summarise by average housing quality
  summarise(`Average Housing Quality` = mean(X2019_Nature_Africa_Housing_2015_NGA))


 ggplot() +
  geom_sf(data = Aggregate1, aes(fill = `Average Housing Quality`)) +
  scale_fill_gradient(low = "#EEFFEE", high = "#335533")  +
  labs(title = "Nigerian Average Housing Quality Score by Region") +
  mapTheme()

Question 9

Identify any website of your choice and scrub their data. Pull it into a csv and make at least one data visualization.

STEPS

  1. Identified mortality dataset of Balitmore

  2. Downloaded all associed data

  3. Joined data with younger populations together — split up from older populations so that we can have one scale for younger populations and one scale for older populations as the mortality rate is higher in older populations

  4. Alter table to prepare for a facet wrap map

  5. Create map

  6. Repeat for older populations

M1To14<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort14__-7131955727435385779.geojson')%>% 
  dplyr::select(CSA2010, mort14_18)

M15To24<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort24__440437359064296490.geojson')%>% 
  dplyr::select(CSA2010, mort24_18)

M24To44<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort44__6262205918832827315.geojson')%>% 
  dplyr::select(CSA2010, mort44_18)

M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>% 
  dplyr::select(CSA2010, mort64_18)

M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>% 
  dplyr::select(CSA2010, mort64_18)

M65To84<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort84__7080676275320674118.geojson')%>% 
  dplyr::select(CSA2010, mort84_18)
MortalityData <- 
  M1To14%>%
  st_drop_geometry()%>% 
  left_join(., M15To24)%>% 
  right_join(., M24To44)%>%
  rename(`Ages 1 to 14` = mort14_18, 
         `Ages 15 to 24` = mort24_18, 
         `Ages 25 to 44` = mort44_18)%>% 
  pivot_longer(
    cols = c(`Ages 1 to 14`,`Ages 15 to 24`,`Ages 25 to 44`) ,
    names_to = "Age Category", 
    values_to = "Mortality")%>% 
  st_as_sf()

ggplot() +
  geom_sf(data = MortalityData, aes(fill =  Mortality)) +
  scale_fill_gradient(low = "#FFEEEE", high = "#773333")  +
  labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Younger Populations") +
  mapTheme()+ 
  facet_wrap(vars(`Age Category`))

MortalityData_Older <- 
  M45To64%>%
  st_drop_geometry()%>% 
  left_join(., M65To84)%>% 
  rename(`Ages 45 to 64` = mort64_18, 
         `Ages 65 to 84` = mort84_18)%>% 
  pivot_longer(
    cols = c(`Ages 45 to 64`,`Ages 65 to 84`) ,
    names_to = "Age Category", 
    values_to = "Mortality")%>% 
  st_as_sf()

ggplot() +
  geom_sf(data = MortalityData_Older, aes(fill =  Mortality)) +
  scale_fill_gradient(low = "#FFEEEE", high = "#773333")  +
  labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Older Populations") +
  mapTheme()+ 
  facet_wrap(vars(`Age Category`))

---
title: "Loyala University Screening Test"
author: "Kyle McCarthy"
date: "August 6th, 2024"
output:
  html_document:
    toc: yes
    toc_float: yes
    code_folding: hide
    code_download: yes
  word_document:
    toc: yes
  pdf_document:
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r SetUp, message = FALSE, warning = FALSE, cache = TRUE, results = 'hide'}

#Loading all libraries that may be needed

library(sf) 
library(tidyverse) 
library(dplyr) 
library(ggplot2) 
library(ggmap) 
library(scales)
library(gridExtra)
library(tidycensus)
library(cowplot) 
library(OpenStreetMap)
library(rJava)
library(knitr)
library(gganimate)
library(ggmap)
library(transformr)
library(lubridate)
library(rayshader)
library(viridis)
library(rgl)
library(foreign)
library(marmap)
library(raster)
library(ggrepel)
library(RColorBrewer)

root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")

```



```{r LoadData, message = FALSE, warning = FALSE, cache = TRUE, results = 'hide'}

# Loading Geography Data
Data <- st_read("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\shapefile\\gadm36_NGA_1.shp")

# Loading Survey Data
Survey <- read.dta("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\NGPR7AFL.DTA")



```

## Question 1 

How many Positive Blood Smear Test Children under 5? 

Steps

1. Filter to those with ages under 6 (corrective age)

2. Filter those who tested positive for a Blood Smear

3. Ensure no repeat Ids so an indiviual is not counted twice

ANSWER = 1546

```{r Number1_Answer, message = FALSE, warning = FALSE, cache = TRUE}

#Answering qustion 1 --- filtering the survey by those with a blood smear who tested positive and are under the age of 6 based on their corrective age

Under5_BloodSmear <- Survey%>%
  filter(hml16 < 6 & hml32 == 'positive')%>%
  ## selecting only these three categories
  dplyr::select(hml32, hhid)

#Counting, ensuring no repeat IDs 

x <- count(distinct(Under5_BloodSmear))

kable(x)

```
## Question 2 

How Many Rapid Tests for Children Under 5? 

Steps

1. Filter to children under 6 based on their corrective age. 

2. Filter to where rapid blood test *hml35) is equal to positive, negative, or missing indicating a test was making

3. Ensure there are no duplicate IDs. 


ANSWER = 10815

```{r Number2_Answer, message = FALSE, warning = FALSE, cache = TRUE}
#Filtering by those that either tested negative or positive for the rapid blood test and have a corrective age under the age of 6

Under5_Rapid <- Survey%>%
  #Selecting the relevant fields
  dplyr::select(hhid, hml16, hml35)%>%
  #Filtering
  filter(hml16 < 6  & (hml35 == 'positive' | hml35 == 'negative' | hml35 == 'missing'))

# Counting ensuring no repeat ids
x <- count(distinct(Under5_Rapid))

kable(x)

```
## Question 3 

How many Microscopy Positive Results for Children? 

STEPS

1. Using blood smear test as the microscopy result

2. filtered to those that tested positive for the blood smear

3. Also filtered to those that were selected for a microscopy

4. Use corrective age to ensure age under 18

5. Remove duplicate Ids



ANSWER = 1546


```{r Number3_Answer, message = FALSE, warning = FALSE, cache = TRUE}

Microscopy <- Survey%>%
  filter(hml16 < 18  &  hml32 == 'positive', sbelig == "yes, selected for microscopy")%>% 
  dplyr::select(hhid, sbelig, hml32)

  
# Ensuring no duplicate IDs
x <- count(distinct(Microscopy))

kable(x)

```
## Question 4 

How many Positive Rapid Tests for Children?

STEPS

1. Filter by corrective age to under 18 and rapid test to positive

2. Ensure no duplicate IDs

ANSWER = 3229


```{r Number4_Answer, message = FALSE, warning = FALSE, cache = TRUE}

#Filtering by rapid test 
RapidChildren <- Survey%>%
  filter(hml16 < 18 & hml35 == 'positive') %>% 
  dplyr::select(hhid, ha50, hml35)

x <- count(distinct(RapidChildren))

kable(x)
```
## Question 5 

Make a plot showing the number of U5 children that tested positive for malaria by microscopy and the number of U5 children that tested negative for malaria by microscopy in urban and rural areas

STEPS

1. Filtered by Children under 5

2. Create categories for rural/urban, positive status/ negative status using new fields

3. Ensure no duplicate IDs

4. Reformat table

5. Create Graph

```{r Number5_Answer, message = FALSE, warning = FALSE, cache = TRUE, fig.height= 6, fig.width= 10}

GraphQ5 <- 
  # Take Survey Data
  Survey%>% 
  #Select necessary columns
  dplyr::select(hhid, hv025, hml16, sbelig, hml32)%>%
  #Filtering by corrective age under 6
  filter(hml16 < 6) %>% 
  # Create new field for rural and negative
  mutate(Rural = ifelse(hv025 == 'rural' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>% 
  # Create new field for urban and negative
  mutate(Urban = ifelse(hv025 == 'urban' & hml32 == 'negative' & sbelig == 'yes, selected for microscopy', 1, 0))%>% 
  #Create new field for positive and rural
  mutate(pos = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'rural', 1, 0))%>% 
  #Create new field for positive and urban
  mutate(pos2 = ifelse(hml32 == 'positive' & sbelig == 'yes, selected for microscopy' & hv025 == 'urban', 1, 0))%>%
  #Select new proportion fields and ID
  dplyr::select(hhid, Urban, Rural, pos, pos2)%>% 
  #Filter down to children that euqal 1 for at least one of the four created fields
  filter(Urban == 1  | Rural == 1 | pos == 1 | pos2 == 1)%>% 
  #remove duplicate IDs
  distinct()%>% 
  #Select proportion fields
  dplyr::select(Urban, Rural, pos, pos2)%>% 
  #Sum up each category
  summarise(Pos = sum(pos), Pos2 = sum(pos2),  Rural = sum(Rural), Urban = sum(Urban))%>% 
  #Rename fields
  rename(`Positive for Malaria ; Rural` = Pos, `Positive for Malaria ; Urban` = Pos2,`Negative for Malaria ; Rural`= Rural, `Negative for Malaria ; Urban` = Urban)%>% 
  #pivot table to set up for ggplot
  pivot_longer(
    cols = `Positive for Malaria ; Rural` :`Negative for Malaria ; Urban`,
    names_to = "Category", 
    values_to = "Children")%>% 
  #Make sure number of children is numeric
  mutate(Children = as.numeric(Children))

ggplot()+ 
  geom_bar(data = GraphQ5, aes(x = Category, y = Children, fill = Category ), stat = 'identity')+ 
  labs(title = "Number of Nigerian Children by Malaria Status via Microscopy", 
       subtitle = "Source: Nigerian Health Survey")+ 
  ylab("Number of Children") + 
  scale_fill_manual(values = c( '#E59E9E','#E62020', '#00F38A','#008C4F'))+
  plotTheme()+ 
  theme(legend.position = "none")

```

## Question 6 

Make another plot showing the proportion of U5 children that tested positive for malaria by microscopy and the proportion of U5 children that tested negative for malaria by microscopy in urban and rural areas

STEPS

1. Use Question 5 analysis

2. Calculate proportions

3. Calculate pie chart positioning

4. Create graph using GGPlot

```{r Number6_Answer, message = FALSE, warning = FALSE, cache = TRUE, fig.height= 4, fig.width= 6}

options(digits = 3) 

GraphQ5_Prop <- 
  #Taking previous analysis from Question 5
  GraphQ5%>% 
  #Calculating proportions for each category
  mutate(Proportion = (Children/ sum(GraphQ5$Children)* 100))%>%
  #Selecting only category and proportion
  dplyr::select(Category, Proportion)%>%
  #Field to define label positioning on pie chart
  mutate(csum = rev(cumsum(rev(Proportion))), 
         pos = Proportion/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Proportion/2, pos))


ggplot(GraphQ5_Prop, aes(x="", y=Proportion, fill=fct_inorder(Category))) +
  geom_col(width = 1) +
  coord_polar(theta = "y", start = 0) +
  theme_void() + 
  theme(legend.position = "right")+
  labs(title = "Percentage of Children Testing Positive or Negative for Malaria by Geographic Region", 
       subtitle = "Source: Nigeran Health Survey")+
    geom_label_repel(data = GraphQ5_Prop,
                   aes(y = pos, label = paste0(as.integer(Proportion), "%")),
                   size = 3.5, nudge_x = 1, show.legend = FALSE)+
  scale_fill_manual(values = c( '#00F38A','#008C4F','#E59E9E','#E62020'))+ 
  guides(fill=guide_legend(title="Categories"))

```

## Question 7 

Make a map of the number of children that tested positive for malaria by state

STEPS

1. Filter by positive rapid or positive blood smear, along with under 18 corrective age

2. Add new feild for count

3. Ensure no duplicate IDs

4. Group by State

5. Sum Count

6. Join state data to survey data

7. Create map


```{r Number7_Answer, message = FALSE, warning = FALSE, cache = TRUE, fig.height= 6, fig.width= 10}

ByState <- 
  Survey%>%
  #Filter by children positive for rapid test or positive for blood smear
  filter((hml32 == 'positive' | hml35 == 'positive') & hml16 < 18) %>%
  #New field where count is one (going to be used for aggregation)
  mutate(Count = 1)%>%
  #Select necessary fields
  dplyr::select(hhid, shstate, Count)%>% 
  #Remmove repeat IDS
  distinct() %>% 
  #Group by state
  group_by(shstate) %>%
  #Sum by summing up the count field previously created
  summarise(ChildCases = sum(Count)) %>% 
  #Dealing with NA Values
  mutate(ChildCases = ifelse(is.na(ChildCases) == T, 0, ChildCases))

Geo <- 
  Data%>% 
  #mutating state name to lowercase for join
  mutate(shstate = tolower(NAME_1))%>% 
  #joining by State
  left_join(ByState, by = "shstate") 

 ggplot() +
  geom_sf(data = Geo, aes(fill = q5(ChildCases))) +
  scale_fill_manual(values = c('#FFEBEB', '#FFAFAD','#FF221F','#E00400', '#A30300' ), 
                    labels = c("0-23", "24-70", "71-99", "100 - 120", "121 - 176", "N/A"), 
                    name = "Child Malaria Cases \n(Quintile Breaks)") +
  labs(title = "Nigerian Malaria Cases by Region", subtitle = "Source: Nigerian Health Survey") +
  mapTheme()




```

## Question 8 

Extract the data from the raster file and make a map showing average housing quality values for each Nigerian state.

STEPS

1. Import Raster

2. Transform to Poly

3. Transform to SF Class

4. Intersect Nigerian Stateboundaries with SF Class

5. Group by State

6. Summarise average housing quality

7. Create Map

```{r Number8_Answer, message = FALSE, warning = FALSE, cache = TRUE, fig.height= 5, fig.width= 7}

#Importing Raster
NigerianHousing <- raster("C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\NGPR7ADT\\housing data 2019 nigeria\\2019_Nature_Africa_Housing_2015_NGA.tiff")
#Raster to Poly
poly<- rasterToPolygons(NigerianHousing)

# To SF Class
poly <- st_as_sf(poly)

#Intersecting new poly with nigerian raster
Aggregate <- 
  st_intersection(poly, Geo)

Aggregate1<- 
  Aggregate%>%
  #Grouping by state
  group_by(shstate)%>%
  #Summarise by average housing quality
  summarise(`Average Housing Quality` = mean(X2019_Nature_Africa_Housing_2015_NGA))


 ggplot() +
  geom_sf(data = Aggregate1, aes(fill = `Average Housing Quality`)) +
  scale_fill_gradient(low = "#EEFFEE", high = "#335533")  +
  labs(title = "Nigerian Average Housing Quality Score by Region") +
  mapTheme()

  
```

## Question 9

Identify any website of your choice and scrub their data. Pull it into a csv and make at least one data visualization.

STEPS

1. Identified mortality dataset of Balitmore

2. Downloaded all associed data

3. Joined data with younger populations together --- split up from older populations so that we can have one scale for younger populations and one scale for older populations as the mortality rate is higher in older populations

4. Alter table to prepare for a facet wrap map

5. Create map 

6. Repeat for older populations


```{r Number9_Data, message = FALSE, warning = FALSE, cache = TRUE, results = 'hide'}

M1To14<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort14__-7131955727435385779.geojson')%>% 
  dplyr::select(CSA2010, mort14_18)

M15To24<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort24__440437359064296490.geojson')%>% 
  dplyr::select(CSA2010, mort24_18)

M24To44<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort44__6262205918832827315.geojson')%>% 
  dplyr::select(CSA2010, mort44_18)

M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>% 
  dplyr::select(CSA2010, mort64_18)

M45To64<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort64__-6209940949464912372.geojson')%>% 
  dplyr::select(CSA2010, mort64_18)

M65To84<- st_read('C:\\Users\\Kyle McCarthy\\Documents\\Loyola\\Mort84__7080676275320674118.geojson')%>% 
  dplyr::select(CSA2010, mort84_18)

```

```{r Number9_Answer, message = FALSE, warning = FALSE, cache = TRUE, fig.height= 5, fig.width= 7}

MortalityData <- 
  M1To14%>%
  st_drop_geometry()%>% 
  left_join(., M15To24)%>% 
  right_join(., M24To44)%>%
  rename(`Ages 1 to 14` = mort14_18, 
         `Ages 15 to 24` = mort24_18, 
         `Ages 25 to 44` = mort44_18)%>% 
  pivot_longer(
    cols = c(`Ages 1 to 14`,`Ages 15 to 24`,`Ages 25 to 44`) ,
    names_to = "Age Category", 
    values_to = "Mortality")%>% 
  st_as_sf()

ggplot() +
  geom_sf(data = MortalityData, aes(fill =  Mortality)) +
  scale_fill_gradient(low = "#FFEEEE", high = "#773333")  +
  labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Younger Populations") +
  mapTheme()+ 
  facet_wrap(vars(`Age Category`))


MortalityData_Older <- 
  M45To64%>%
  st_drop_geometry()%>% 
  left_join(., M65To84)%>% 
  rename(`Ages 45 to 64` = mort64_18, 
         `Ages 65 to 84` = mort84_18)%>% 
  pivot_longer(
    cols = c(`Ages 45 to 64`,`Ages 65 to 84`) ,
    names_to = "Age Category", 
    values_to = "Mortality")%>% 
  st_as_sf()

ggplot() +
  geom_sf(data = MortalityData_Older, aes(fill =  Mortality)) +
  scale_fill_gradient(low = "#FFEEEE", high = "#773333")  +
  labs(title = "Mortality Rate by Balitmore Neighborhood", subtitle = "Older Populations") +
  mapTheme()+ 
  facet_wrap(vars(`Age Category`))

  
  

```

