#--------It seems that our preiuos code didn't work lets now have another project on rate of spread of diseases in the world-----
###############################################33

##we first start by identifying the disease like cancer, HPV "Human Papilloma Virus", ,,,,,,etc.
 #lets install the required packages first
#------Lets setup  and install packages-----------
required <- c(
       "tidyverse", "lubridate", "ggplot2", "plotly", "reshape2",
       "sf", "rnaturalearth", "rnaturalearthdata", "viridis"
)

inst <- required[!(required %in% installed.packages()[, "Package"])]
if(length(inst) > 0){
       install.packages(inst)
}

lapply(required, library, character.only = TRUE)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'plotly' was built under R version 4.5.2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## Warning: package 'reshape2' was built under R version 4.5.2
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
## Warning: package 'rnaturalearth' was built under R version 4.5.2
## Warning: package 'rnaturalearthdata' was built under R version 4.5.2
## 
## Attaching package: 'rnaturalearthdata'
## 
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
## 
## Loading required package: viridisLite
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "plotly"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "reshape2"  "plotly"    "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[6]]
##  [1] "sf"        "reshape2"  "plotly"    "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[7]]
##  [1] "rnaturalearth" "sf"            "reshape2"      "plotly"       
##  [5] "lubridate"     "forcats"       "stringr"       "dplyr"        
##  [9] "purrr"         "readr"         "tidyr"         "tibble"       
## [13] "ggplot2"       "tidyverse"     "stats"         "graphics"     
## [17] "grDevices"     "utils"         "datasets"      "methods"      
## [21] "base"         
## 
## [[8]]
##  [1] "rnaturalearthdata" "rnaturalearth"     "sf"               
##  [4] "reshape2"          "plotly"            "lubridate"        
##  [7] "forcats"           "stringr"           "dplyr"            
## [10] "purrr"             "readr"             "tidyr"            
## [13] "tibble"            "ggplot2"           "tidyverse"        
## [16] "stats"             "graphics"          "grDevices"        
## [19] "utils"             "datasets"          "methods"          
## [22] "base"             
## 
## [[9]]
##  [1] "viridis"           "viridisLite"       "rnaturalearthdata"
##  [4] "rnaturalearth"     "sf"                "reshape2"         
##  [7] "plotly"            "lubridate"         "forcats"          
## [10] "stringr"           "dplyr"             "purrr"            
## [13] "readr"             "tidyr"             "tibble"           
## [16] "ggplot2"           "tidyverse"         "stats"            
## [19] "graphics"          "grDevices"         "utils"            
## [22] "datasets"          "methods"           "base"
#----------creating a sample dataset of disease cases-------
set.seed(123)

years <- 2014:2023

diseases <- tibble(
       year = rep(years, each = 4),
       disease = rep(c("Influenza", "Cholera", "Tuberculosis", "Malaria"), times = 10),
       cases = round(runif(40, 1000, 50000))
)

head(diseases)
## # A tibble: 6 × 3
##    year disease      cases
##   <int> <chr>        <dbl>
## 1  2014 Influenza    15091
## 2  2014 Cholera      39627
## 3  2014 Tuberculosis 21040
## 4  2014 Malaria      44268
## 5  2015 Influenza    47083
## 6  2015 Cholera       3232
#-------------we produce a line chart: Disease trends over time------
ggplot(diseases, aes(x = year, y = cases, color = disease)) +
       geom_line(linewidth = 1.2) +
       geom_point() +
       theme_minimal() +
       labs(
              title = "Disease Spread Over Time (10-Year Trend)",
              x = "Year",
              y = "Number of Cases"
       )

#-------------Interactive version of plotly--------
plot_ly(
       data = diseases,
       x = ~year,
       y = ~cases,
       color = ~disease,
       type = 'scatter',
       mode = 'lines+markers'
)
#---------Bar chart comaprison by year-------

ggplot(diseases, aes(x = factor(year), y = cases, fill = disease)) +
       geom_col(position = "dodge") +
       theme_minimal() +
       labs(
              title = "Yearly Case Comparison Across Diseases",
              x = "Year",
              y = "Cases"
       )

#-------------heatmap of disease intensity---------

heat <- diseases

ggplot(heat, aes(x = factor(year), y = disease, fill = cases)) +
       geom_tile(color = "white") +
       scale_fill_viridis(option = "magma") +
       theme_minimal() +
       labs(
              title = "Heatmap of Disease Spread Intensity",
              x = "Year",
              y = "Disease",
              fill = "Cases"
       )

#---------simulated country rates--------
world <- ne_countries(scale = "medium", returnclass = "sf")

set.seed(222)
world$disease_rate <- sample(0:100, nrow(world), replace = TRUE)

ggplot(world) +
       geom_sf(aes(fill = disease_rate), color = NA) +
       scale_fill_viridis(option = "plasma") +
       theme_minimal() +
       labs(
              title = "Global Spread Intensity of Selected Diseases",
              fill = "Rate"
       )

#--------Correlation of diseases---------
wide <- diseases |>
       pivot_wider(names_from = disease, values_from = cases)

cor_matrix <- cor(wide[, -1])

cor_matrix
##               Influenza    Cholera Tuberculosis    Malaria
## Influenza     1.0000000  0.1202325   -0.1786440 -0.2202134
## Cholera       0.1202325  1.0000000   -0.2687629 -0.1093879
## Tuberculosis -0.1786440 -0.2687629    1.0000000  0.1050440
## Malaria      -0.2202134 -0.1093879    0.1050440  1.0000000
#----------correlation heatmap--------
library(reshape2)

melted_cor <- melt(cor_matrix)

ggplot(melted_cor, aes(Var1, Var2, fill = value)) +
       geom_tile() +
       scale_fill_viridis(option = "inferno") +
       theme_minimal() +
       labs(
              title = "Correlation Between Diseases",
              fill = "Correlation"
       )

#---------------Fit lm on the diseases-------
library(dplyr)
library(broom)

library(broom)

# Fit linear regression per disease
regression_results <- diseases %>%
       group_by(disease) %>%
       do(model = lm(cases ~ year, data = .)) %>%
       mutate(tidy_model = list(tidy(model)))

# View the regression coefficients
regression_results$tidy_model
## [[1]]
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  254666.  3550281.    0.0717   0.945
## 2 year           -115.     1759.   -0.0653   0.950
## 
## [[2]]
## # A tibble: 2 × 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) -1022478.  2867344.    -0.357   0.731
## 2 year             522.     1421.     0.367   0.723
## 
## [[3]]
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) 4470868.  2638547.      1.69   0.129
## 2 year          -2197.     1307.     -1.68   0.131
## 
## [[4]]
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) 1627870.  3601465.     0.452   0.663
## 2 year           -794.     1784.    -0.445   0.668
#----------plot regression line on the existing line chart----------

ggplot(diseases, aes(x = year, y = cases, color = disease)) +
       geom_point() +
       geom_line() +
       geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
       theme_minimal() +
       labs(
              title = "Disease Spread Over Time with Linear Regression",
              x = "Year",
              y = "Number of Cases"
       )
## `geom_smooth()` using formula = 'y ~ x'

#---------Regression coefficient tables--------
coef_table <- regression_results %>%
       unnest(tidy_model) %>%
       select(disease, term, estimate) %>%
       pivot_wider(names_from = term, values_from = estimate)

coef_table
## # A tibble: 4 × 3
##   disease      `(Intercept)`   year
##   <chr>                <dbl>  <dbl>
## 1 Cholera            254666.  -115.
## 2 Influenza        -1022478.   522.
## 3 Malaria           4470868. -2197.
## 4 Tuberculosis      1627870.  -794.
#--------This a generated project by TREVA Org.co.ke-----------