#--------It seems that our preiuos code didn't work lets now have another project on rate of spread of diseases in the world-----
###############################################33
##we first start by identifying the disease like cancer, HPV "Human Papilloma Virus", ,,,,,,etc.
#lets install the required packages first
#------Lets setup and install packages-----------
required <- c(
"tidyverse", "lubridate", "ggplot2", "plotly", "reshape2",
"sf", "rnaturalearth", "rnaturalearthdata", "viridis"
)
inst <- required[!(required %in% installed.packages()[, "Package"])]
if(length(inst) > 0){
install.packages(inst)
}
lapply(required, library, character.only = TRUE)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'plotly' was built under R version 4.5.2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Warning: package 'reshape2' was built under R version 4.5.2
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
## Warning: package 'rnaturalearth' was built under R version 4.5.2
## Warning: package 'rnaturalearthdata' was built under R version 4.5.2
##
## Attaching package: 'rnaturalearthdata'
##
## The following object is masked from 'package:rnaturalearth':
##
## countries110
##
## Loading required package: viridisLite
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "plotly" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "reshape2" "plotly" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[6]]
## [1] "sf" "reshape2" "plotly" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[7]]
## [1] "rnaturalearth" "sf" "reshape2" "plotly"
## [5] "lubridate" "forcats" "stringr" "dplyr"
## [9] "purrr" "readr" "tidyr" "tibble"
## [13] "ggplot2" "tidyverse" "stats" "graphics"
## [17] "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[8]]
## [1] "rnaturalearthdata" "rnaturalearth" "sf"
## [4] "reshape2" "plotly" "lubridate"
## [7] "forcats" "stringr" "dplyr"
## [10] "purrr" "readr" "tidyr"
## [13] "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices"
## [19] "utils" "datasets" "methods"
## [22] "base"
##
## [[9]]
## [1] "viridis" "viridisLite" "rnaturalearthdata"
## [4] "rnaturalearth" "sf" "reshape2"
## [7] "plotly" "lubridate" "forcats"
## [10] "stringr" "dplyr" "purrr"
## [13] "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats"
## [19] "graphics" "grDevices" "utils"
## [22] "datasets" "methods" "base"
#----------creating a sample dataset of disease cases-------
set.seed(123)
years <- 2014:2023
diseases <- tibble(
year = rep(years, each = 4),
disease = rep(c("Influenza", "Cholera", "Tuberculosis", "Malaria"), times = 10),
cases = round(runif(40, 1000, 50000))
)
head(diseases)
## # A tibble: 6 × 3
## year disease cases
## <int> <chr> <dbl>
## 1 2014 Influenza 15091
## 2 2014 Cholera 39627
## 3 2014 Tuberculosis 21040
## 4 2014 Malaria 44268
## 5 2015 Influenza 47083
## 6 2015 Cholera 3232
#-------------we produce a line chart: Disease trends over time------
ggplot(diseases, aes(x = year, y = cases, color = disease)) +
geom_line(linewidth = 1.2) +
geom_point() +
theme_minimal() +
labs(
title = "Disease Spread Over Time (10-Year Trend)",
x = "Year",
y = "Number of Cases"
)

#-------------Interactive version of plotly--------
plot_ly(
data = diseases,
x = ~year,
y = ~cases,
color = ~disease,
type = 'scatter',
mode = 'lines+markers'
)
#---------Bar chart comaprison by year-------
ggplot(diseases, aes(x = factor(year), y = cases, fill = disease)) +
geom_col(position = "dodge") +
theme_minimal() +
labs(
title = "Yearly Case Comparison Across Diseases",
x = "Year",
y = "Cases"
)

#-------------heatmap of disease intensity---------
heat <- diseases
ggplot(heat, aes(x = factor(year), y = disease, fill = cases)) +
geom_tile(color = "white") +
scale_fill_viridis(option = "magma") +
theme_minimal() +
labs(
title = "Heatmap of Disease Spread Intensity",
x = "Year",
y = "Disease",
fill = "Cases"
)

#---------simulated country rates--------
world <- ne_countries(scale = "medium", returnclass = "sf")
set.seed(222)
world$disease_rate <- sample(0:100, nrow(world), replace = TRUE)
ggplot(world) +
geom_sf(aes(fill = disease_rate), color = NA) +
scale_fill_viridis(option = "plasma") +
theme_minimal() +
labs(
title = "Global Spread Intensity of Selected Diseases",
fill = "Rate"
)

#--------Correlation of diseases---------
wide <- diseases |>
pivot_wider(names_from = disease, values_from = cases)
cor_matrix <- cor(wide[, -1])
cor_matrix
## Influenza Cholera Tuberculosis Malaria
## Influenza 1.0000000 0.1202325 -0.1786440 -0.2202134
## Cholera 0.1202325 1.0000000 -0.2687629 -0.1093879
## Tuberculosis -0.1786440 -0.2687629 1.0000000 0.1050440
## Malaria -0.2202134 -0.1093879 0.1050440 1.0000000
#----------correlation heatmap--------
library(reshape2)
melted_cor <- melt(cor_matrix)
ggplot(melted_cor, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_viridis(option = "inferno") +
theme_minimal() +
labs(
title = "Correlation Between Diseases",
fill = "Correlation"
)

#---------------Fit lm on the diseases-------
library(dplyr)
library(broom)
library(broom)
# Fit linear regression per disease
regression_results <- diseases %>%
group_by(disease) %>%
do(model = lm(cases ~ year, data = .)) %>%
mutate(tidy_model = list(tidy(model)))
# View the regression coefficients
regression_results$tidy_model
## [[1]]
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 254666. 3550281. 0.0717 0.945
## 2 year -115. 1759. -0.0653 0.950
##
## [[2]]
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1022478. 2867344. -0.357 0.731
## 2 year 522. 1421. 0.367 0.723
##
## [[3]]
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4470868. 2638547. 1.69 0.129
## 2 year -2197. 1307. -1.68 0.131
##
## [[4]]
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1627870. 3601465. 0.452 0.663
## 2 year -794. 1784. -0.445 0.668
#----------plot regression line on the existing line chart----------
ggplot(diseases, aes(x = year, y = cases, color = disease)) +
geom_point() +
geom_line() +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
theme_minimal() +
labs(
title = "Disease Spread Over Time with Linear Regression",
x = "Year",
y = "Number of Cases"
)
## `geom_smooth()` using formula = 'y ~ x'

#---------Regression coefficient tables--------
coef_table <- regression_results %>%
unnest(tidy_model) %>%
select(disease, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate)
coef_table
## # A tibble: 4 × 3
## disease `(Intercept)` year
## <chr> <dbl> <dbl>
## 1 Cholera 254666. -115.
## 2 Influenza -1022478. 522.
## 3 Malaria 4470868. -2197.
## 4 Tuberculosis 1627870. -794.
#--------This a generated project by TREVA Org.co.ke-----------