Kefa_Sheka_review_analysis_report

# Sindew Mekasha Feleke (Augest, 2023)

## Introduction: Onchocerciasis is an important public health problem in Ethiopia. The first rapid epidemiological mapping (REMO) was conducted in 1998 in Keffa and Sheka zones of southwest Ethiopia. And, the mass drug administration using the drug invermectin (MDAi) was launched in 2001. This report included the baseline prevalence, MDA coverage and program assessment survey results. 

## Part 1. Baseline REMO data of 9 first line villages in 3 districts,  keffa and Sheka zones,southwest Ethiopia.
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

District <- c('Decha', 'Decha', 'Gimbo', 'Gimbo', 'Yeki', 'Yeki', 'Yeki', 'Yeki', 'Yeki')
Village <- c('Andracha', 'Ermo', 'Choba', 'Omach', 'Achine', 'Bukuri', 'Nari', 'Narri2', 'Youmbachi')
REMO_Prevalence <- c(56.7, 31.3, 34.8, 47.2, 29.7, 42, 80, 54.8, 40)
REMO1 <- data_frame(District, Village, REMO_Prevalence)

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##1.1 data visualization 
Kf <- ggplot(REMO1, aes(Village, REMO_Prevalence, fill = District)) +
  geom_bar(stat = "identity")+
   theme_bw()+
  xlab("First-line villages")+
  scale_x_discrete(limits = c("Andracha", "Ermo", "Omach", "Choba", "Nari", "Narri2", "Bukuri", "Youmbachi", "Achine"))+
  annotate("text", label = "Prevalence by community p=0.02",x=2.4, y=65)+
  annotate("text", label = "Prevalence by district p=0.842",x=2.4, y=60)+
  theme(text  = element_text(size = 12))+
  geom_hline(yintercept = 20)+
  theme_bw()+
  geom_boxplot()
  #coord_equal()
Kf

ggplotly(Kf)

## adding immage to R plot


library(png)
library(patchwork)
A1 <- png::readPNG("C:/Users/EPHI/Desktop/SF_Dec23_all/PNG/RB_EthMap.png")
Plot_map <- Kf +
  inset_element(p=A1,
               left = 0.55,
                bottom = 0.63,
                right = 1.25,
                top = 1.03)
               


  
  #annotate("text", label = "MDAi(%) by year p-value=0.33",x=62.8, y=14.4)+

### adding immage to R plot
library(png)
library(patchwork)
Site_map <- readPNG("C:/Users/EPHI/Desktop/SF_Dec23_all/PNG/RB_EthMap.png")
MapSite_population<- A1+
 inset_element(p=Site_map,
             left = 0.5,
              bottom = 0.5,
              right = 0.95,
              top = 0.95)
MapSite_population

## NULL

## 1.2 reordering 

REMO1$Village <- factor(REMO1$Village,
                 levels = REMO1$Village[order(REMO1$Village, decreasing = TRUE)])
### 1.3. Analysis of variances (Annova) district level (independent variable) REMO prevalence (dependent variables) 
Annova1 <- aov(REMO_Prevalence ~ District, data = REMO1)
summary(Annova1)#%>%

##             Df Sum Sq Mean Sq F value Pr(>F)
## District     2  111.8   55.88   0.177  0.842
## Residuals    6 1896.1  316.02

 # TukeyHSD() %>% 
plot

## function (x, y, ...) 
## UseMethod("plot")
## <bytecode: 0x000001d0a55847e0>
## <environment: namespace:base>

AnnovaV <- aov(REMO_Prevalence ~ Village, data = REMO1)
AnnovaV

## Call:
##    aov(formula = REMO_Prevalence ~ Village, data = REMO1)
## 
## Terms:
##                  Village
## Sum of Squares  2007.896
## Deg. of Freedom        8
## 
## Estimated effects may be unbalanced

REMO1 %>% lm(REMO_Prevalence ~ Village, data = .) %>% summary()

## 
## Call:
## lm(formula = REMO_Prevalence ~ Village, data = .)
## 
## Residuals:
## ALL 9 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)         40.0        NaN     NaN      NaN
## VillageOmach         7.2        NaN     NaN      NaN
## VillageNarri2       14.8        NaN     NaN      NaN
## VillageNari         40.0        NaN     NaN      NaN
## VillageErmo         -8.7        NaN     NaN      NaN
## VillageChoba        -5.2        NaN     NaN      NaN
## VillageBukuri        2.0        NaN     NaN      NaN
## VillageAndracha     16.7        NaN     NaN      NaN
## VillageAchine      -10.3        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 8 and 0 DF,  p-value: NA

##1.4 Linear regression analysis of nodule prevalence (dependent variable) in different districts(independent location)
REMO1 %>% lm(REMO_Prevalence ~ District, data = .) %>% 
      summary() # %>%   #TukeyHSD() %>%   plot

## 
## Call:
## lm(formula = REMO_Prevalence ~ District, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -19.6   -9.3   -6.2    6.2   30.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      44.00      12.57   3.500   0.0128 *
## DistrictGimbo    -3.00      17.78  -0.169   0.8715  
## DistrictYeki      5.30      14.87   0.356   0.7338  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.78 on 6 degrees of freedom
## Multiple R-squared:  0.05566,    Adjusted R-squared:  -0.2591 
## F-statistic: 0.1768 on 2 and 6 DF,  p-value: 0.8421

## Part-2 Annual MDAi coverage report in keffa and Sheka zone (2001 to 2012)
library(tidyverse)
library(plotly)
MDA_annual <- read.csv('C:/Users/EPHI/Desktop/SF_Dec23_all/MDA1.csv')
summary(MDA_annual)

##        ID           District              Year         Coverage    
##  Min.   :  1.00   Length:162         Min.   :2001   Min.   :60.00  
##  1st Qu.: 65.25   Class :character   1st Qu.:2005   1st Qu.:76.00  
##  Median :111.50   Mode  :character   Median :2007   Median :79.00  
##  Mean   :107.11                      Mean   :2007   Mean   :77.77  
##  3rd Qu.:151.75                      3rd Qu.:2010   3rd Qu.:80.00  
##  Max.   :192.00                      Max.   :2012   Max.   :92.00

print(unique(MDA_annual$District))

##  [1] "Gewata "     "Gimbo "      "Andracha "   "Masha  "     "Yeki "      
##  [6] "Bitta"       "Chena "      "Gesha"       "Saylem "     "Adiyo"      
## [11] "Tello"       "Decha"       "Cheta "      "Tepi town "  "Bonga town" 
## [16] "Masha town "

M <-ggplot(MDA_annual, aes(Coverage, District, color=Year))+
  #scale_x_continuous(limits = c(0, 100))+
  #scale_y_continuous(limits = c(0, 120))+
  geom_point()+
  geom_line()+
  geom_boxplot()+
  theme_bw()+
  ggtitle("Figure 2: Annual MDAi reported coverage(%) in Keffa and Sheka zones, 2001-2012")+
  ylab("Districts under MDAi")+
  xlab("Annual MDAi coverage(%)                
  MDAi(%) coverage by year aov* p-value = 0.33,MDAi(%), MDAi coverage by district aov* p-value=0.56
  Key: aov=analysis of variance, The vertical line at x=80% is minimum target coverage(%),
        Box plot represents minimum, first quartile, median, third quartile and maximum values")+
    #annotate("text", label = "MDAi(%) by year p-value=0.33",x=62.8, y=14.4)+
 # annotate("text", label = "MDAi(%) by district p-value=0.56",x=63.1, y=12.4)+
#annotate("text", label = "Legend:",x=61, y=15.6)+
  theme(text  = element_text(size = 9))+
   theme(legend.text = element_text(size = 10))+
  geom_vline(xintercept = 80)
M

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplotly(M)

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Analysis of variance (Annova) of Annual MDA coverage (dependent variable) in different districts (independent variables) and years (2001 to 2012) (independent variables). 

Annova_annual <- aov(Coverage ~ Year , data = MDA_annual)
 summary(Annova_annual)

##              Df Sum Sq Mean Sq F value Pr(>F)
## Year          1   16.3   16.33   0.946  0.332
## Residuals   160 2762.2   17.26

 Annova_annual <- aov(Coverage ~ District , data = MDA_annual)
 summary(Annova_annual)

##              Df Sum Sq Mean Sq F value Pr(>F)
## District     15  236.1   15.74   0.904  0.561
## Residuals   146 2542.4   17.41

## Revised (no legend) Part-2 Annual MDAi coverage report in keffa and Sheka zone (2001 to 2012)
library(tidyverse)
MDA_annual <- read.csv('C:/Users/EPHI/Desktop/SF_Dec23_all/MDA1.csv')
summary(MDA_annual)

##        ID           District              Year         Coverage    
##  Min.   :  1.00   Length:162         Min.   :2001   Min.   :60.00  
##  1st Qu.: 65.25   Class :character   1st Qu.:2005   1st Qu.:76.00  
##  Median :111.50   Mode  :character   Median :2007   Median :79.00  
##  Mean   :107.11                      Mean   :2007   Mean   :77.77  
##  3rd Qu.:151.75                      3rd Qu.:2010   3rd Qu.:80.00  
##  Max.   :192.00                      Max.   :2012   Max.   :92.00

print(unique(MDA_annual$District))

##  [1] "Gewata "     "Gimbo "      "Andracha "   "Masha  "     "Yeki "      
##  [6] "Bitta"       "Chena "      "Gesha"       "Saylem "     "Adiyo"      
## [11] "Tello"       "Decha"       "Cheta "      "Tepi town "  "Bonga town" 
## [16] "Masha town "

ggplot(MDA_annual, aes(Coverage, District, color=Year))+
  #scale_x_continuous(limits = c(0, 100))+
  #scale_y_continuous(limits = c(0, 120))+
  geom_point()+
  geom_line()+
  geom_boxplot()+
  theme_bw()+
  ylab("Districts under MDAi")+
  xlab("Annual MDAi coverage(%)")+                
  annotate("text", label = "MDAi(%) by year aov p=0.33",x=65.6, y=1.7)+
  annotate("text", label = "MDAi(%) by district aov p=0.56",x=65.8, y=2.4)+
#annotate("text", label = "Legend:",x=61, y=15.6)+
  #theme(text  = element_text(size = 9))+
  theme(legend.text = element_text(size = 10))+
  geom_vline(xintercept = 80)

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Part-2 Annual MDAi coverage report in keffa and Sheka zone (2001 to 2012)
library(tidyverse)
library(plotly)
MDA_annual <- read.csv('C:/Users/EPHI/Desktop/SF_Dec23_all/MDA1.csv')
summary(MDA_annual)

##        ID           District              Year         Coverage    
##  Min.   :  1.00   Length:162         Min.   :2001   Min.   :60.00  
##  1st Qu.: 65.25   Class :character   1st Qu.:2005   1st Qu.:76.00  
##  Median :111.50   Mode  :character   Median :2007   Median :79.00  
##  Mean   :107.11                      Mean   :2007   Mean   :77.77  
##  3rd Qu.:151.75                      3rd Qu.:2010   3rd Qu.:80.00  
##  Max.   :192.00                      Max.   :2012   Max.   :92.00

S <- ggplot(MDA_annual, aes(Year, Coverage, color=District))+
  scale_x_continuous(limits = c(2000, 2014))+
  scale_y_continuous(limits = c(40, 100))+
  geom_point()+
  #geom_line()+
  geom_boxplot()+
  theme_bw()+
  ylab("Annual MDAi coverage(%)")+
  xlab("MDAi year")+
  #MDAi(%) coverage by year aov* p-value = 0.33,MDAi(%), MDAi coverage by district aov* p-value=0.56
  #Key: aov=analysis of variance, The vertical line at x=80% is minimum target coverage(%),
     #   Box plot represents minimum, first quartile, median, third quartile and maximum values")+
   # annotate("text", label = "MDAi(%) by district p-value=0.56",x=63.1, y=12.4)+
#annotate("text", label = "Legend:",x=61, y=15.6)+
  #theme(text  = element_text(size = 8))+
  geom_hline(yintercept = 80)+
  annotate("text", label = "MDAi(%) coverage by year aov p-value=0.33",x=2005.7, y=42)+
  annotate("text", label = "MDAi(%) coverage by district aov p-value=0.056",x=2006, y=45)+
  theme(text = element_text(size = 12))
S

ggplotly(S)

## Part-2.2 Biannual MDAi coverage report in keffa and Sheka zone (2001 to 2012)
#library(tidyverse)
#MDA_all <- read.csv('C:/Users/EPHI/Desktop/MDA1.csv')
#summary(MDA_all)
#ggplot(MDA_all, aes(District, Coverage, color=Year))+
  #geom_point()+
  #geom_boxplot()
## ANALYSIS IS NOT COMPLETE
library(tidyverse)
twiceMDAi1 <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Biannual_MDAi.csv")
summary(twiceMDAi1)

##       Year          Zone             District             MDAi1     
##  Min.   :2013   Length:127         Length:127         Min.   :71.0  
##  1st Qu.:2014   Class :character   Class :character   1st Qu.:83.0  
##  Median :2016   Mode  :character   Mode  :character   Median :84.0  
##  Mean   :2017                                         Mean   :84.3  
##  3rd Qu.:2018                                         3rd Qu.:85.5  
##  Max.   :2021                                         Max.   :90.0

print(unique(twiceMDAi1$District))

##  [1] " Gewata "     " Gimbo "      " Saylem "     " Tello "      " Andracha "  
##  [6] " Masha T "    " Masha  "     " Tepi T "     " Yeki "       " Adiyo "     
## [11] " Bitta "      " Bonga T"     " Chena "      " Cheta "      " Decha "     
## [16] " Gesha "      " Bonga "      " Goba "       " Shishoinde " " Wacha "     
## [21] " Andracha  "

print(twiceMDAi1[!duplicated(twiceMDAi1$District), ])

##     Year    Zone     District MDAi1
## 1   2013  Keffa       Gewata     82
## 2   2013  Keffa        Gimbo     82
## 3   2013  Keffa       Saylem     81
## 4   2013  Keffa        Tello     82
## 5   2013  Sheka     Andracha     83
## 6   2013  Sheka      Masha T     81
## 7   2013  Sheka       Masha      80
## 8   2013  Sheka       Tepi T     80
## 9   2013  Sheka         Yeki     83
## 10  2013  Keffa        Adiyo     82
## 11  2013  Keffa        Bitta     82
## 12  2013  Keffa       Bonga T    83
## 13  2013  Keffa        Chena     83
## 14  2013  Keffa        Cheta     83
## 15  2013  Keffa        Decha     82
## 16  2013  Keffa        Gesha     84
## 92  2018  Keffa        Bonga     85
## 99  2019  Keffa         Goba     83
## 101 2019  Keffa   Shishoinde     84
## 103 2019  Keffa        Wacha     83
## 104 2019  Sheka    Andracha      85

ggplot(twiceMDAi1, aes(x=MDAi1,
                      y=District,
                      color=Year
                      ))+
  geom_point(size=2)+
  geom_line(color="black")+
  geom_boxplot()+

  #theme_classic()+
  theme(text  = element_text(size = 12))+
  annotate("text", label = "Coverage by year aov p-value=0.00",x=74.8, y=5)+
    annotate("text", label = "Coverage by district aov p-value=0.09",x=75.1, y=3.8)+
  annotate("text", label = "Coverage by zone aov p-value=0.73",x=74.9, y=2.5)+
  annotate("text", label = "Figure 3A.",x=71.8, y=6.2)+
  theme_bw()+
   theme(text  = element_text(size = 9))+
  #theme(plot.title = element_text(size=9))+
  xlab("First round MDAi coverages(%)")+
  
  geom_vline(xintercept = 80)+
  scale_fill_gradient() #name="MDAi Year", "low"="lightblue", "high"="green")

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Part-2.3 Biannual MDAi coverage report in keffa and Sheka zone (2001 to 2012)
#library(tidyverse)
#MDA_all <- read.csv('C:/Users/EPHI/Desktop/MDA1.csv')
#summary(MDA_all)
#ggplot(MDA_all, aes(District, Coverage, color=Year))+
  #geom_point()+
  #geom_boxplot()
## ANALYSIS IS NOT COMPLETE
library(tidyverse)
twiceMDAi1 <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Biannual_MDAi.csv")
summary(twiceMDAi1)

##       Year          Zone             District             MDAi1     
##  Min.   :2013   Length:127         Length:127         Min.   :71.0  
##  1st Qu.:2014   Class :character   Class :character   1st Qu.:83.0  
##  Median :2016   Mode  :character   Mode  :character   Median :84.0  
##  Mean   :2017                                         Mean   :84.3  
##  3rd Qu.:2018                                         3rd Qu.:85.5  
##  Max.   :2021                                         Max.   :90.0

ggplot(twiceMDAi1, aes(x=Year,
                      y=MDAi1,
                      color=District
                      
                      ))+
  geom_point()+
  geom_boxplot()+
  ylab("First round MDAi coverage(%)")+
  xlab("MDAi year,
       Key: aov=analysis of variance")+
  ggtitle("Figure 2.3: First round MDAi reported coverage(%) by MDA years in Keffa and Sheka zones, 2013-2021")+
   scale_x_continuous(limits = c(2012, 2020))+
  scale_y_continuous(limits = c(60, 90))+
  annotate("text", label = "MDAi(%) coverage by year aov p-value=2.96e-06",x=2016, y=62)+
  geom_hline(yintercept = 80)+
  theme_bw()+
   theme(text  = element_text(size = 9))

## Warning: Removed 12 rows containing missing values (`stat_boxplot()`).

## Warning: Removed 12 rows containing missing values (`geom_point()`).

  #annotate("text", label = "Coverage by year aov p-value=0.00",x=74.8, y=5)+
  #annotate("text", label = "Coverage by district aov p-value=0.09",x=75.1, y=3.8)+
  #annotate("text", label = "Coverage by zone aov p-value=0.73",x=74.9, y=2.5)+
  #annotate("text", label = "Legend:",x=71.8, y=6.2)+

    #theme(plot.title = element_text(size=9))+
  #xlab("First round MDAi coverages(%)
    #  Key: aov=analysis of variance, Vertical line at x=80 is minimum target coverage(%),
    #  Box plot represents minimum, first quartile, median, third quartile and maximum values")+
  #ylab("Districts under MDAi")+
 MDAi1aov <- aov(MDAi1 ~ Year, data=twiceMDAi1)
summary(MDAi1aov)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Year          1  100.2  100.17   23.97 2.96e-06 ***
## Residuals   125  522.5    4.18                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#############################################
Biannual1 <- aov(MDAi1 ~ Year , data = twiceMDAi1)
 summary(Biannual1)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Year          1  100.2  100.17   23.97 2.96e-06 ***
## Residuals   125  522.5    4.18                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Biannuala <- aov(MDAi1 ~ District , data = twiceMDAi1)
 summary( Biannuala)

##              Df Sum Sq Mean Sq F value Pr(>F)
## District     20  123.4   6.172   1.311  0.189
## Residuals   106  499.2   4.709

  Biannualb <- aov(MDAi1 ~ Zone , data = twiceMDAi1)
 summary( Biannualb)

##              Df Sum Sq Mean Sq F value Pr(>F)
## Zone          1    0.6   0.593   0.119   0.73
## Residuals   125  622.0   4.976

library(tidyverse)
twiceMDAi <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Biannual_MDAi2.csv")
summary(twiceMDAi)

##       Year          Zone             District             MDAi2       
##  Min.   :2013   Length:135         Length:135         Min.   : 73.00  
##  1st Qu.:2015   Class :character   Class :character   1st Qu.: 83.00  
##  Median :2017   Mode  :character   Mode  :character   Median : 84.00  
##  Mean   :2017                                         Mean   : 84.69  
##  3rd Qu.:2019                                         3rd Qu.: 86.00  
##  Max.   :2021                                         Max.   :106.00

ggplot(twiceMDAi, aes(x=MDAi2,
                      y=District,
                      color=Year))+
  geom_point()+
  geom_line()+
  geom_boxplot()+
  theme(text  = element_text(size = 12))+
annotate("text", label = "Coverage by year p=0.00",x=67.7, y=3.65)+
    annotate("text", label = "Coverage by district p=0.056",x=68.8, y=2.9)+
  annotate("text", label = "Coverage by zone p=0.56",x=68.1, y=2.)+
  annotate("text", label = "Figure 3B AOV for:",x=64.5, y=4.5)+
  xlab("Second round MDAi coverages(%)")+
  ylab("MDAi districts in Keffa and Sheka zones")+
  scale_x_continuous(limits = c(60, 110))+
  theme_bw()+
  theme(text  = element_text(size = 11))+
  geom_vline(xintercept = 80)

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Biannual2 <- aov(MDAi2 ~ Year , data = twiceMDAi)
 summary(Biannual2)

##              Df Sum Sq Mean Sq F value Pr(>F)   
## Year          1  112.5  112.46   8.438 0.0043 **
## Residuals   133 1772.5   13.33                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Biannual2a <- aov(MDAi2 ~ District , data = twiceMDAi)
 summary(Biannual2a)

##              Df Sum Sq Mean Sq F value Pr(>F)  
## District     20  457.9   22.89   1.829 0.0252 *
## Residuals   114 1427.0   12.52                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  Biannual2b <- aov(MDAi2 ~ Zone , data = twiceMDAi)
 summary(Biannual2b)

##              Df Sum Sq Mean Sq F value Pr(>F)
## Zone          1    4.8    4.80    0.34  0.561
## Residuals   133 1880.1   14.14

## Part 3: Coverage validation survey data analysis. The coverage report by community drug distributers and district program officers are compared with the coverage data by independent study teams study report between 2014 and 2020.
library(tidyverse)
library(mclust)

## Package 'mclust' version 6.0.0
## Type 'citation("mclust")' for citing this R package in publications.

## 
## Attaching package: 'mclust'

## The following object is masked from 'package:purrr':
## 
##     map

coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
#coverage1
#view(coverage1)
#summary(coverage1)
ggplot(coverage1, aes(x=Reported_coverage....,
                      y=Survey_coverage....,
                      color=Year))+
  geom_point(size=2.5)+
  geom_density2d(adjust=5)+
  #geom_line()+
  #geom_smooth()+
  xlab("Reported MDA coverages(%)")+
  ylab("Survey MDA coverages(%)")+
      
  theme_bw()+
  annotate("text", label = "Survey versus reported coverages(%) p=0.018",x=89.8, y=62.5)+
  annotate("text", label = "Survey coverages(%) by districts p=0.011",x=88.7, y=60)+
  annotate("text", label = "Survey coverages(%) by zones p=0.614",x=88.5, y=57.5)+
  annotate("text", label = "Survey coverages(%) by years p=0.462",x=88.5, y=55)+
  annotate("text", label = "Reported coverages(%) by years p=0.449",x=89, y=52.5)+
  annotate("text", label = "Reported coverages(%) by districts p=0.491",x=90, y=50)+
  annotate("text", label = "Analysis of variance (aov) p-values legend:",x=87.2, y=65)+
  theme(text = element_text(size = 9))+
  geom_hline(yintercept = 80)

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

#dens1 <- densityMclust(coverage1$Survey_coverage....)
#plot(dens1)
#dens2 <- densityMclust(coverage1$Reported_coverage....)
#plot(dens2)

## Part 3 version 2: Coverage validation survey data analysis. The coverage report by community drug distributers and district program officers are compared with the coverage data by independent study teams study report between 2014 and 2020.
library(tidyverse)
coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
#coverage1
view(coverage1)
#summary(coverage1)
ggplot(coverage1, aes(x=Reported_coverage....,
                      y=Survey_coverage....,
                      color=Year))+
  geom_point(size=2.5)+
  geom_line()+
  geom_smooth()+
  xlab("Reported MDA coverages(%)")+
  
   #Legend:Survey versus reported coverages(%) aov p-value=0.018, Survey coverages(%) by districts aov p-value=0.011,
   #Survey coverages(%) by zones aov p-value=0.614, Survey coverages(%) by years aov p-value=0.462,
    #Reported coverages(%) by years p-value=0.449, Reported coverages(%) by districts p-value=0.491,
    #y=80 horizontal line is minimum expected coverage, geom smooth shows the coverage trend, aov=Analysis of variance) 
    
  ylab("Survey MDA coverages(%)")+
  theme_bw()+
  annotate("text", label = "Survey versus reported coverages(%) p=0.018",x=89.5, y=62.5)+
  annotate("text", label = "Survey coverages(%) by districts p=0.011",x=88.7, y=60)+
  annotate("text", label = "Survey coverages(%) by zones p=0.614",x=88.5, y=57.5)+
 annotate("text", label = "Survey coverages(%) by years p=0.462",x=88.5, y=55)+
  annotate("text", label = "Reported coverages(%) by years p=0.449",x=89, y=52.5)+
  annotate("text", label = "Reported coverages(%) by districts p=0.491",x=90, y=50)+
  annotate("text", label = "Figure 4A. analysis of variance (aov) p-values:",x=88.8, y=65)+
  
  theme(text = element_text(size = 12))+
  geom_hline(yintercept = 80)+
  scale_x_continuous(limits = c(50, 100))+
  scale_y_continuous(limits = c(50, 100))

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# District level MDAi coverage validation survey coverage in different survey years. 
library(tidyverse)
coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
head(coverage1)

##   Year  Zone District  Kebele Sample.n. Result..n. Survey_coverage....
## 1 2014 Kaffa    Decha   Awrad       242        214                  88
## 2 2014 Kaffa    Decha     Ufa       319        271                  85
## 3 2014 Sheka Andracha Beshifa       141        122                  87
## 4 2014 Sheka Andracha  Getiba        92         88                  96
## 5 2016 Sheka   Masha     Yina      1029        760                  74
## 6 2016 Sheka   Masha    Wollo       549        370                  67
##   Reported_coverage.... P.value
## 1                    81   <0.01
## 2                    83   >0.01
## 3                    84   >0.01
## 4                    88   <0.01
## 5                    84   <0.01
## 6                    77   <0.01

ggplot(coverage1, aes(x=District,
                      y=Survey_coverage....,
                      color=Year))+
  geom_point()+
  #geom_line(color="black")+
  geom_boxplot()+
  xlab("Figure 4B:Survey coverage by MDAi districts (p=0.01)")+
  ylab("MDA coverage survey results(%)")+
  theme_bw()+
theme(text  = element_text(size = 12))+
#annotate("text", label = "Survey coverages(%) by districts p-value=0.011",x=7, y=95)+
  #annotate("text", label = "Figure 4B.",x=4.6, y=97)+
geom_hline(yintercept = 80)

## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# The  sub district (Kebele) level MDAi coverage 
library(tidyverse)
summary(coverage1)

##       Year          Zone             District            Kebele         
##  Min.   :2014   Length:33          Length:33          Length:33         
##  1st Qu.:2016   Class :character   Class :character   Class :character  
##  Median :2018   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2017                                                           
##  3rd Qu.:2019                                                           
##  Max.   :2020                                                           
##    Sample.n.      Result..n.    Survey_coverage.... Reported_coverage....
##  Min.   :  92   Min.   : 88.0   Min.   :63.00       Min.   : 66.00       
##  1st Qu.: 286   1st Qu.:230.0   1st Qu.:80.00       1st Qu.: 83.00       
##  Median : 325   Median :270.0   Median :83.00       Median : 85.00       
##  Mean   : 351   Mean   :286.7   Mean   :83.24       Mean   : 85.55       
##  3rd Qu.: 357   3rd Qu.:293.0   3rd Qu.:88.00       3rd Qu.: 88.00       
##  Max.   :1029   Max.   :760.0   Max.   :97.00       Max.   :100.00       
##    P.value         
##  Length:33         
##  Class :character  
##  Mode  :character  
##                    
##                    
##

ggplot(coverage1, aes(x=Survey_coverage....,
                     y=Kebele,
                      color=District))+
  geom_point(size=2)+
  #geom_density_2d()+
  geom_line(color="black")+
  #geom_boxplot()+
  xlab("Kebeles(villages) survey coverages(%),
       Figure 4C: Survey coverage(%) by kebele p=0.018")+
  ylab("Coverage survey sites (Kebeles in districts (Woreda))")+
  theme_bw()+
  theme(text  = element_text(size = 11))+
  geom_vline(xintercept = 80)

  #annotate("text", label = "Figure 4C. Survey coverage(%) by kebele p=0.018",x=60, y=5)
coverage1$Survey_coverage.... <- as.character(coverage1$Survey_coverage....,
                 levels = coverage1$Survey_coverage....[order(coverage1$Survey_coverage...., decreasing = TRUE)])
  
 #theme(plot.title = element_text(size = 10))
  #annotate("text", label = "Survey coverages(%) by districts p-value=0.011",x=8, y=63)
Annova2 <- aov(Survey_coverage.... ~ Kebele , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## Kebele      31   1704   54.97   1.099  0.652
## Residuals    1     50   50.00

library(tidyverse)
##part 3.1- Annova test 
library(tidyverse)
 Annova2 <- aov(Survey_coverage.... ~ Reported_coverage.... , data = coverage1)
 summary(Annova2)

##                       Df Sum Sq Mean Sq F value Pr(>F)  
## Reported_coverage....  1  291.2  291.24   6.172 0.0186 *
## Residuals             31 1462.8   47.19                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  Annova2 <- aov(Survey_coverage.... ~ District , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## District     9  732.6   81.40   1.833  0.116
## Residuals   23 1021.5   44.41

 Annova2 <- aov(Survey_coverage.... ~ Zone , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## Zone         1   14.6   14.56   0.259  0.614
## Residuals   31 1739.5   56.11

  Annova2 <- aov(Survey_coverage.... ~ Year , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## Year         1   30.8   30.83   0.555  0.462
## Residuals   31 1723.2   55.59

  Annova2 <- aov(Reported_coverage.... ~ Year , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## Year         1   22.7   22.72   0.589  0.449
## Residuals   31 1195.5   38.56

 Annova2 <- aov(Reported_coverage.... ~ District , data = coverage1)
 summary(Annova2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## District     9  217.9   24.21   0.557  0.818
## Residuals   23 1000.2   43.49

### Part 4.0 - Program assessment survey results 
library(tidyverse)
library(ggplot2)
setwd('C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022')
Year <- c(1998, 2011, 2012, 2017, 2021, 2021)
Cases <- c(46.3, 4.3, 2.5, 2.8, 0.5, 0.65)
Survey_type_year <- c("REMO-1998)", "REMO-2011", "Microfilaria-2012", "Ov-16 ELISA-2017/19", "Ov-16 ELISA-2019/21", "O-150 PCR-2021")
Impact <- data.frame(Year, Cases, Survey_type_year, header=TRUE)
#view(Impact)
ggplot(Impact, aes(x=Year,
                   y=Cases
                 #color=Survey_type_year
                 ))+
  geom_point(size=2.5)+
 geom_smooth(method = lm)+
 geom_line(color="black")+

scale_color_discrete(limits = c("REMO-1998", "REMO-2011", "Microfilaria-2012", "Ov-16 ELISA-2017/19", "Ov-16 ELISA-2019/21", "O-150 PCR-2018/21"))+
 scale_y_continuous(limits = c(0, 60))+
 scale_x_continuous(limits = c(1995, 2025))+
  
  #geom_errorbar()+
  xlab("MDA_Years(2001 to 2021)")+
  ylab("Onchocerca positives(%)")+
  #theme_void()+
  #coord_equal()+
  #geom_smooth(method = lm)+
  #facet_wrap(Year, keep.source = FALSE)+
  theme_gray()+
   #annotate("text", label= "Y-intercept = 3713", x=2015, y=56)+
  annotate("text", label="Slop = -1.8393", x=2014, y=52)+
  annotate("text", label = "R-square = 0.8036", x=2015, y=48)+
  annotate("text", label = "P-value = 0.006264", x=2015.2, y=44)+
  #labs(title ='Year:{frame_time})', x='Year', y='Cases')+
  theme_bw()+
  theme(text = element_text(size = 12))

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 10 rows containing missing values (`geom_smooth()`).

  #theme(plot.title = element_text(size = 10))+
  #theme(axis.title.x  = element_text(size = 10))+
  #theme(axis.title.y  = element_text(size = 10))+
  #theme(axis.text.y  = element_text(size = 10))+
  #theme(axis.text.x  = element_text(size = 10))

  #Impact$Survey_type_year_results <- factor(Impact$Survey_type_year_results,
  #levels = Impact$Survey_type_year_results[order(Impact$Survey_type_year_results, decreasing = TRUE)])

### Part 4.1 - Program assessment survey results. This analysis shows the significant reduction of transmission between 1998 and 2021. However, the evidence is still show low level active transmission. 

library(tidyverse)
setwd('C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022')
Year <- c(1998, 2011, 2012, 2017, 2021, 2021)
Cases <- c(46.3, 4.3, 2.5, 2.8, 0.5, 0.03)
Survey_type_year_results <- c("REMO-1998(46.3%)", "REMO-2011(4.3%)", "Microfilaria-2012(2.5%)", "Ov-16 ELISA-2017/19(2.8)", "Ov-16 ELISA-2019/21(0.5%)", "O-150 PCR-2021(0.63%)")
Impact <- data.frame(Year, Cases, Survey_type_year_results, header=TRUE)
#view(Impact)
ggplot(Impact, aes(x=Year,
                   y=Cases,
                ))+
  geom_point(size=3)+
 scale_color_discrete(limits = c("REMO-1998(46.3%)", "REMO-2011(4.3%)", "Microfilaria-2012(2.5%)", "Ov-16 ELISA-2017/19(2.8)", "Ov-16 ELISA-2019/21(0.5%)", "O-150 PCR-2018/21(0.63%)"))+
 scale_y_continuous(limits = c(0, 60))+
 scale_x_continuous(limits = c(1995, 2025))+
  geom_line(color="blue")+
  #geom_smooth()+
  xlab("MDA_Years(2001 to 2021)")+
  ylab("Onchocerca positives(%)")+
  #ggtitle("Figure 5: Onchocerciasis survey results in Keffa and Sheka zones, 1998 and 2021")+
  #theme_void()+
  #coord_equal()+
  geom_smooth(method = lm)+
  #facet_wrap(Survey_type, keep.source = FALSE)+
  theme_gray()+
  theme_bw()+
  #annotate("text", label= "Y-intercept = 3713", x=2015, y=56)+
  annotate("text", label="Slope = -1.8393", x=2014, y=52)+
  annotate("text", label = "R-square = 0.8036", x=2015, y=48)+
  annotate("text", label = "p-value = 0.006264", x=2015.2, y=44)+
  theme(text  = element_text(size = 8))+
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 10 rows containing missing values (`geom_smooth()`).

  #Impact$Survey_type_year_results <- factor(Impact$Survey_type_year_results,
  #levels = Impact$Survey_type_year_results[order(Impact$Survey_type_year_results, decreasing = TRUE)])

### Kaplan_Meier Survival plot
# Survival analysis of onchocerciasis in Keffa and Sheka. In Keffa and Sheka zone, Onchocerciasis elimination program through MDAi is underway since 2001. This analysis indicates the probability of transmission interruption (extinction of worm) in followup treatment groups (MDAi groups) over a period of 20 years. Treatment groups (districts) for 20 consecutive years that have achieved WHO criteria of transmission interruption  are denoted "0" and those which didn't met denoted "1" 
library(tidyverse)
library(tidyverse)
library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

#library(tidyquant)
library(patchwork)
library(survival)
library(survminer)

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

library(ggtext)

Districts <- c("Adiyo", "Andiracha", "Bita", "Bonga", "Bonga T", "Chana", "Cheta", "Decha", "Gesha", "Gewata","Gimbo", "Goba", "Masha town", "Masha", "Saylem", "Shisho Inde", "Tello", "Tepi town", "Wacha", "Yeki")

MDA_Year <- c(26, 28, 27, 6, 24, 28, 27, 27, 28, 29, 29, 6, 23, 29, 28, 6, 27, 23, 6, 29)

Prevalence <- c(1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0)

Zone <- c("Keffa", "Sheka", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa", "Keffa","Sheka", "Sheka", "Keffa", "Keffa", "Keffa", "Sheka", "Keffa", "Sheka")
  
  Surv_data <- data.frame(Zone, Districts, MDA_Year, Prevalence, header=TRUE )
  #view(Surv_data)

km <- survfit(Surv(MDA_Year, Prevalence) ~ 1, data=Surv_data)
  plot(km,
  xlab="Followup years=MDA rounds",
  ylab="Onchocerciasis tranmission  probability")
  ggsurvplot(km)

ggsurvplot(km, xlim=c(0, 30), break.x.by = 5, ylab="Transmission probablity", xlab="MDA rounds (time)",
           risk.table=TRUE, risk.table.title="Table:Transmission probablity in districts",surv.scale = "percent", .risk.table.height=20)

theme_survminer(font.main = c(2, "black"),
                font.x = c(6, "black"),
                font.y = c(6, "black"))

## List of 97
##  $ line                      :List of 6
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.545
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                      :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.545
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                      :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 12
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                     : NULL
##  $ aspect.ratio              : NULL
##  $ axis.title                : NULL
##  $ axis.title.x              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "black"
##   ..$ size         : num 6
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 3points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top          :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 3points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom       : NULL
##  $ axis.title.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "black"
##   ..$ size         : num 6
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 3points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left         : NULL
##  $ axis.title.y.right        :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 3points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x               :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 12
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 0
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.4points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top           :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.4points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom        : NULL
##  $ axis.text.y               :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 12
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : num 0
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.4points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left          : NULL
##  $ axis.text.y.right         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.4points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                :List of 6
##   ..$ colour       : chr "grey20"
##   ..$ linewidth    : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.ticks.x              : NULL
##  $ axis.ticks.x.top          : NULL
##  $ axis.ticks.x.bottom       : NULL
##  $ axis.ticks.y              : NULL
##  $ axis.ticks.y.left         : NULL
##  $ axis.ticks.y.right        : NULL
##  $ axis.ticks.length         : 'simpleUnit' num 3points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x       : NULL
##  $ axis.ticks.length.x.top   : NULL
##  $ axis.ticks.length.x.bottom: NULL
##  $ axis.ticks.length.y       : NULL
##  $ axis.ticks.length.y.left  : NULL
##  $ axis.ticks.length.y.right : NULL
##  $ axis.line                 :List of 6
##   ..$ colour       : chr "black"
##   ..$ linewidth    : 'rel' num 1
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.line.x               : NULL
##  $ axis.line.x.top           : NULL
##  $ axis.line.x.bottom        : NULL
##  $ axis.line.y               : NULL
##  $ axis.line.y.left          : NULL
##  $ axis.line.y.right         : NULL
##  $ legend.background         :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : logi NA
##   ..$ linewidth    : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.margin             : 'margin' num [1:4] 6points 6points 6points 6points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing            : 'simpleUnit' num 12points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing.x          : NULL
##  $ legend.spacing.y          : NULL
##  $ legend.key                : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.key.size           : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height         : NULL
##  $ legend.key.width          : NULL
##  $ legend.text               :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 10
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.align         : NULL
##  $ legend.title              :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 10
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.align        : NULL
##  $ legend.position           : chr "top"
##  $ legend.direction          : NULL
##  $ legend.justification      : chr "center"
##  $ legend.box                : NULL
##  $ legend.box.just           : NULL
##  $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "unit")= int 1
##  $ legend.box.background     : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing        : 'simpleUnit' num 12points
##   ..- attr(*, "unit")= int 8
##  $ panel.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ linewidth    : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.border              : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ panel.spacing             : 'simpleUnit' num 6points
##   ..- attr(*, "unit")= int 8
##  $ panel.spacing.x           : NULL
##  $ panel.spacing.y           : NULL
##  $ panel.grid                :List of 6
##   ..$ colour       : chr "grey92"
##   ..$ linewidth    : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major          : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ panel.grid.minor          : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ panel.grid.major.x        : NULL
##  $ panel.grid.major.y        : NULL
##  $ panel.grid.minor.x        : NULL
##  $ panel.grid.minor.y        : NULL
##  $ panel.ontop               : logi FALSE
##  $ plot.background           :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : chr "white"
##   ..$ linewidth    : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "black"
##   ..$ size         : num 2
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : num 1
##   ..$ margin       : 'margin' num [1:4] 0points 0points 6points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.title.position       : chr "panel"
##  $ plot.subtitle             :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 15
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : num 1
##   ..$ margin       : 'margin' num [1:4] 0points 0points 6points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption              :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 15
##   ..$ hjust        : num 1
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : num 1
##   ..$ margin       : 'margin' num [1:4] 6points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption.position     : chr "panel"
##  $ plot.tag                  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag.position         : chr "topleft"
##  $ plot.margin               : 'margin' num [1:4] 6points 6points 6points 6points
##   ..- attr(*, "unit")= int 8
##  $ strip.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ linewidth    : 'rel' num 2
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ strip.background.x        : NULL
##  $ strip.background.y        : NULL
##  $ strip.clip                : chr "inherit"
##  $ strip.placement           : chr "inside"
##  $ strip.text                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey10"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 4.8points 4.8points 4.8points 4.8points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.x              : NULL
##  $ strip.text.x.bottom       : NULL
##  $ strip.text.x.top          : NULL
##  $ strip.text.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.y.left         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.y.right        : NULL
##  $ strip.switch.pad.grid     : 'simpleUnit' num 3points
##   ..- attr(*, "unit")= int 8
##  $ strip.switch.pad.wrap     : 'simpleUnit' num 3points
##   ..- attr(*, "unit")= int 8
##  - attr(*, "class")= chr "theme"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE

Model1 <- lm(Prevalence ~ MDA_Year)
plot(Model1)

summary(Model1)

## 
## Call:
## lm(formula = Prevalence ~ MDA_Year)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3050 -0.3042 -0.3002  0.6960  0.7137 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2814282  0.3065613   0.918    0.371
## MDA_Year    0.0008146  0.0125837   0.065    0.949
## 
## Residual standard error: 0.483 on 18 degrees of freedom
## Multiple R-squared:  0.0002327,  Adjusted R-squared:  -0.05531 
## F-statistic: 0.00419 on 1 and 18 DF,  p-value: 0.9491

###############*
coxph <- coxph(Surv(MDA_Year, Prevalence) ~ Zone, method="breslow") #, data=Surv_data)

## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.

summary(coxph)

## Call:
## coxph(formula = Surv(MDA_Year, Prevalence) ~ Zone, method = "breslow")
## 
##   n= 20, number of events= 6 
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)
## ZoneSheka -1.952e+01  3.319e-09  1.178e+04 -0.002    0.999
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## ZoneSheka 3.319e-09  301334179         0       Inf
## 
## Concordance= 0.641  (se = 0.061 )
## Likelihood ratio test= 3.67  on 1 df,   p=0.06
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 2.16  on 1 df,   p=0.1

coxph1 <- coxph(Surv(MDA_Year, Prevalence) ~ 1, method="breslow") #, data=Surv_data)
summary(coxph1)

## Call:  coxph(formula = Surv(MDA_Year, Prevalence) ~ 1, method = "breslow")
## 
## Null model
##   log likelihood= -15.24899 
##   n= 20

###############*
exponential <- survreg(Surv(MDA_Year, Prevalence) ~ Zone, dist="exponential") #, data=Surv_data)
summary(exponential)

## 
## Call:
## survreg(formula = Surv(MDA_Year, Prevalence) ~ Zone, dist = "exponential")
##                Value Std. Error    z      p
## (Intercept) 3.99e+00   4.08e-01 9.77 <2e-16
## ZoneSheka   2.03e+01   1.62e+04 0.00      1
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -29.9   Loglik(intercept only)= -32
##  Chisq= 4.1 on 1 degrees of freedom, p= 0.043 
## Number of Newton-Raphson Iterations: 20 
## n= 20

#plot(sfitby_Districts, xlab="Time", ylab="Survival")
exponential2 <- survreg(Surv(MDA_Year, Prevalence) ~ Zone, dist="exponential") #, data=Surv_data)
summary(exponential2)

## 
## Call:
## survreg(formula = Surv(MDA_Year, Prevalence) ~ Zone, dist = "exponential")
##                Value Std. Error    z      p
## (Intercept) 3.99e+00   4.08e-01 9.77 <2e-16
## ZoneSheka   2.03e+01   1.62e+04 0.00      1
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -29.9   Loglik(intercept only)= -32
##  Chisq= 4.1 on 1 degrees of freedom, p= 0.043 
## Number of Newton-Raphson Iterations: 20 
## n= 20

#sfitby_admin <- ggsurvplot(
 # Surv_data, risk.table = TRUE, legend = "top",
 # ggtheme = theme_bw()
#)


####*
#sfitby_Districts <- survfit(coxph(Surv(MDA_Year, Prevalence) ~ Zone), type="aalen") #, data=Surv_data)
#summary(sfitby_Districts)
#plot(sfitby_Districts, xlab="Time", ylab="Transmission probability")
 #ggtitle("Figure 4: Kaplan_Meier Survival plot")

#Conclusion and recommendation
#The baseline epidemiological mapping results have showed high (hyper and meso) level of transmission in Keffa and Sheka zones. Based on these, the MDAi have been implemented since 2001. However, the coverage were not uniform throughout the 20 MDA years. Whereas, the program monitoring results indicates significant reduction of transmission from the baseline or midterm evaluation, there is evidence of active low level transmission. Therefore, continuing MDA with high coverage is warranted.

##Community Knowledge assessment in Keffa and Sheka zones and Guragae zones about onchocerciasis program. 

library(tidyverse)
knowledge <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Knowledge .csv", header=TRUE)
summary(knowledge)

##   Variables         Guragae.zone......n.271 Keffa.and.Sheka.zone......n.256
##  Length:8           Min.   : 5.00           Min.   :24.00                  
##  Class :character   1st Qu.:12.75           1st Qu.:26.75                  
##  Mode  :character   Median :15.50           Median :37.50                  
##                     Mean   :18.88           Mean   :40.62                  
##                     3rd Qu.:26.00           3rd Qu.:44.75                  
##                     Max.   :38.00           Max.   :78.00

ggplot(knowledge, aes(Keffa.and.Sheka.zone......n.256, Variables))+
  geom_point(size=2.5, color="blue")+
  geom_point(aes(x=Guragae.zone......n.271))+ #, color="green"))+
  geom_point(size=2.5, color="red")+
  ylab("Knowledge assessemnt questions")+
  xlab("Percentage of responses(yes)")+ 
  theme_bw()+
   scale_x_continuous(limits = c(1, 80))+
   theme(text = element_text(size = 8.7))+
   theme(text = element_text(size = 12))

  knowledge$Keffa.and.Sheka.zone......n.256 <- as.character(knowledge$Keffa.and.Sheka.zone......n.256,
                 levels = knowledge$Keffa.and.Sheka.zone......n.256[order(knowledge$Keffa.and.Sheka.zone......n.256, decreasing = TRUE)])

###
kaov <- aov(Guragae.zone......n.271 ~ Variables , data = knowledge)
summary(kaov)

##             Df Sum Sq Mean Sq
## Variables    7  960.9   137.3

###
kaov2 <- aov(Keffa.and.Sheka.zone......n.256 ~ Guragae.zone......n.271, data = knowledge)
summary(kaov2)

##                         Df Sum Sq Mean Sq F value Pr(>F)
## Guragae.zone......n.271  1  117.9   117.9   0.329  0.587
## Residuals                6 2153.9   359.0

##Community attitude assessment in Keffa and Sheka zones and Guragae zones about onchocerciasis program. 

library(tidyverse)
library(forcats)
Attitude_k <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Attitude_k.csv", header=TRUE)
summary(Attitude_k)

##       sno         Questions         Keffa.and.Sheka.zones..n.256
##  Min.   : 1.00   Length:10          Min.   : 6.00               
##  1st Qu.: 3.25   Class :character   1st Qu.:67.25               
##  Median : 5.50   Mode  :character   Median :79.50               
##  Mean   : 5.50                      Mean   :69.40               
##  3rd Qu.: 7.75                      3rd Qu.:82.25               
##  Max.   :10.00                      Max.   :96.00               
##  Guragae.zone..n.271
##  Min.   : 1.00      
##  1st Qu.:27.25      
##  Median :49.50      
##  Mean   :47.00      
##  3rd Qu.:68.50      
##  Max.   :88.00

#view(Attitude_k)
ggplot(Attitude_k, aes(Keffa.and.Sheka.zones..n.256, Questions))+
  geom_point(size=2.5, color="blue")+
  geom_point(aes(x=Guragae.zone..n.271))+ #, color="green"))+
  geom_point(size=2.5, color="red")+
  ylab("Attitude assessemnt questions")+
  xlab("Percentage of responses(yes)")+ 
    theme_bw()+
   scale_x_continuous(limits = c(1, 100))+
   theme(text = element_text(size = 12))+
  annotate("text", label = "P = 0.02", x=11, y=1)+
  coord_cartesian()+
  theme(text = element_text(size = 12))

Attitude_k$Keffa.and.Sheka.zones..n.256 <- as.character(Attitude_k$Keffa.and.Sheka.zones..n.256,
                 levels = Attitude_k$Keffa.and.Sheka.zones..n.256[order(Attitude_k$Keffa.and.Sheka.zones..n.256, decreasing = TRUE)])

###
Aaov <- aov(Guragae.zone..n.271 ~ Questions , data = Attitude_k)
summary(Aaov)

##             Df Sum Sq Mean Sq
## Questions    9   6818   757.6

###
Aaov2 <- aov(Keffa.and.Sheka.zones..n.256 ~ Guragae.zone..n.271, data = Attitude_k)
summary(Aaov2)

##                     Df Sum Sq Mean Sq F value Pr(>F)  
## Guragae.zone..n.271  1   3141  3141.4   8.259 0.0207 *
## Residuals            8   3043   380.4                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Community Practice assessment in Keffa and Sheka zones and Guragae zones about onchocerciasis program. 
library(tidyverse)
library(forcats)
Practicek <- read.csv("C:/Users/EPHI/Desktop/SF_Dec23_all/Practicek.csv", header=TRUE)
summary(Practicek)

##       Sno     Questions         Keffa.and.Sheka.zones..n.256.
##  Min.   :1   Length:9           Min.   : 13                  
##  1st Qu.:3   Class :character   1st Qu.: 17                  
##  Median :5   Mode  :character   Median : 44                  
##  Mean   :5                      Mean   : 50                  
##  3rd Qu.:7                      3rd Qu.: 82                  
##  Max.   :9                      Max.   :100                  
##                                                              
##  Guragae.zone..n.271.
##  Min.   :39.00       
##  1st Qu.:53.25       
##  Median :67.50       
##  Mean   :67.50       
##  3rd Qu.:81.75       
##  Max.   :96.00       
##  NA's   :7

colnames(Practicek)

## [1] "Sno"                           "Questions"                    
## [3] "Keffa.and.Sheka.zones..n.256." "Guragae.zone..n.271."

row.names(Practicek)

## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9"

#Reordering
questions <- c("Have you received ivermectin in your life time?","Have you missed any MDAi since it started?","Did you attend the most reecent MDAi?","Why do you miss MDAi? A)Absenteeism?","B) Fear of side effects?","C) Drug shortage","D) MDAi overlap with farming season", "Do you spend most of your time near the river?","Have you experianced intense biting?")
Keffa_Sheka <- c(100,18,82,44,17,13,16,98,62)
Gurage <- c(0,0,0,0,0,0,0,96,39)
Practiceksg <-data_frame(questions, Keffa_Sheka, Gurage)
ggplot(data=Practiceksg,aes(x=Keffa_Sheka,y=forcats::fct_rev(forcats::fct_relevel(questions, "Have you received ivermectin in your life time?","Have you missed any MDAi since it started?","Did you attend the most reecent MDAi?","Why do you miss MDAi? A)Absenteeism?","B) Fear of side effects?","C) Drug shortage","D) MDAi overlap with farming season", "Do you spend most of your time near the river?","Have you experianced intense biting?"))))+
  geom_point(size=2.5, color="blue")+
  geom_point(aes(Gurage))+
  geom_point(size=2.5, color="red")+
  ylab("Practice assessemnt questions")+
  xlab("Percentage of responses(yes)")+ 
  theme_bw()+
  #coord_polar()+
   #scale_x_continuous(limits = c(1, 80))+
   #theme(text = element_text(size = 8.7))+
  annotate("text", label = "P = 0.021", x=78, y=1)+
  scale_x_continuous(limits = c(10, 100))+
  theme(text = element_text(size = 12), order(decreasing = F))

## Warning: Removed 7 rows containing missing values (`geom_point()`).

###
#paov <- aov(Gurage ~ Questions , data = Practiceksg)
#summary(paov)
###
paov2 <- aov(Keffa_Sheka ~ Gurage, data = Practiceksg)
summary(paov2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## Gurage       1   2958    2958   2.693  0.145
## Residuals    7   7688    1098

### PCA of simulium flies in Ethiopia (not preferred way)
library(pegas)

## Loading required package: ape

## 
## Attaching package: 'ape'

## The following object is masked from 'package:ggpubr':
## 
##     rotate

## The following object is masked from 'package:dplyr':
## 
##     where

## 
## Attaching package: 'pegas'

## The following object is masked from 'package:ape':
## 
##     mst

library(adegenet)

## Loading required package: ade4

## Registered S3 method overwritten by 'ade4':
##   method      from 
##   print.amova pegas

## 
## Attaching package: 'ade4'

## The following object is masked from 'package:pegas':
## 
##     amova

## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()

## 
## Attaching package: 'adegenet'

## The following object is masked from 'package:survival':
## 
##     strata

library(viridisLite)
library(viridis)
    popfile<-c("Jima",
                "NG",
                "Aware",
                "Beles",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Block 4",
                 "Daroo",
                 "Daroo",
                 "Daroo",
                 "Dorm09",
                 "Dorm09",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Godguadit",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Kisha",
                 "Seshi",
                 "Worgo",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu",
                 "Wudi Gemzu")
    popfile <- as.factor(popfile)
    ploidy<-2    
    mycol<-c("blue",
             "red",
             "pink",
             "lightblue",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "black",
             "lightgreen",
             "lightgreen",
             "lightgreen",
             "darkgreen",
             "darkgreen",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "yellow",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "#543855",
             "pink",
             "red",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen",
             "darkgreen")
    
     VCFfile <- "D:/All187/sindew.rmi2.qual.sd.msg85.nos.recode.vcf"    
    # read vcf file and convert it into a format that the programs can use
    read.vcf(VCFfile,from=1,to=200000)->map2023.vcf

## File apparently not yet accessed:
## Scanning file D:/All187/sindew.rmi2.qual.sd.msg85.nos.recode.vcf 
## 
 38.60706 / 38.60706 Mb
 38.60706 / 38.60706 Mb
## Done.
## 
Reading 100 / 200000 loci
Reading 200 / 200000 loci
Reading 300 / 200000 loci
Reading 400 / 200000 loci
Reading 500 / 200000 loci
Reading 600 / 200000 loci
Reading 700 / 200000 loci
Reading 800 / 200000 loci
Reading 900 / 200000 loci
Reading 1000 / 200000 loci
Reading 1100 / 200000 loci
Reading 1200 / 200000 loci
Reading 1300 / 200000 loci
Reading 1400 / 200000 loci
Reading 1500 / 200000 loci
Reading 1600 / 200000 loci
Reading 1700 / 200000 loci
Reading 1800 / 200000 loci
Reading 1900 / 200000 loci
Reading 2000 / 200000 loci
Reading 2100 / 200000 loci
Reading 2200 / 200000 loci
Reading 2300 / 200000 loci
Reading 2400 / 200000 loci
Reading 2500 / 200000 loci
Reading 2600 / 200000 loci
Reading 2700 / 200000 loci
Reading 2800 / 200000 loci
Reading 2900 / 200000 loci
Reading 3000 / 200000 loci
Reading 3100 / 200000 loci
Reading 3200 / 200000 loci
Reading 3300 / 200000 loci
Reading 3400 / 200000 loci
Reading 3500 / 200000 loci
Reading 3600 / 200000 loci
Reading 3700 / 200000 loci
Reading 3800 / 200000 loci
Reading 3900 / 200000 loci
Reading 4000 / 200000 loci
Reading 4100 / 200000 loci
Reading 4200 / 200000 loci
Reading 4300 / 200000 loci
Reading 4400 / 200000 loci
Reading 4500 / 200000 loci
Reading 4600 / 200000 loci
Reading 4700 / 200000 loci
Reading 4800 / 200000 loci
Reading 4900 / 200000 loci
Reading 5000 / 200000 loci
Reading 5100 / 200000 loci
Reading 5200 / 200000 loci
Reading 5300 / 200000 loci
Reading 5400 / 200000 loci
Reading 5500 / 200000 loci
Reading 5600 / 200000 loci
Reading 5700 / 200000 loci
Reading 5800 / 200000 loci
Reading 5900 / 200000 loci
Reading 6000 / 200000 loci
Reading 6100 / 200000 loci
Reading 6200 / 200000 loci
Reading 6300 / 200000 loci
Reading 6400 / 200000 loci
Reading 6500 / 200000 loci
Reading 6600 / 200000 loci
Reading 6700 / 200000 loci
Reading 6800 / 200000 loci
Reading 6900 / 200000 loci
Reading 7000 / 200000 loci
Reading 7100 / 200000 loci
Reading 7200 / 200000 loci
Reading 7300 / 200000 loci
Reading 7400 / 200000 loci
Reading 7500 / 200000 loci
Reading 7600 / 200000 loci
Reading 7700 / 200000 loci
Reading 7800 / 200000 loci
Reading 7900 / 200000 loci
Reading 8000 / 200000 loci
Reading 8100 / 200000 loci
Reading 8200 / 200000 loci
Reading 8300 / 200000 loci
Reading 8400 / 200000 loci
Reading 8500 / 200000 loci
Reading 8600 / 200000 loci
Reading 8614 / 8614 loci.
## Done.

    loci2genind(map2023.vcf,ploidy=ploidy)->map2023.gi

## Warning in adegenet::df2genind(as.matrix(x[, attr(x, "locicol"), drop =
## FALSE]), : duplicate labels detected for some loci; using generic labels

    map2023.X<-tab(map2023.gi,NA.method="mean")             
    map2023.pca<-dudi.pca(map2023.X,center=TRUE,scale=FALSE,nf=200,scannf=FALSE)
    summary(map2023.pca)

## Class: pca dudi
## Call: dudi.pca(df = map2023.X, center = TRUE, scale = FALSE, scannf = FALSE, 
##     nf = 200)
## 
## Total inertia: 2644
## 
## Eigenvalues:
##     Ax1     Ax2     Ax3     Ax4     Ax5 
##  351.92  302.50  130.10   50.76   36.61 
## 
## Projected inertia (%):
##     Ax1     Ax2     Ax3     Ax4     Ax5 
##  13.309  11.440   4.920   1.920   1.385 
## 
## Cumulative projected inertia (%):
##     Ax1   Ax1:2   Ax1:3   Ax1:4   Ax1:5 
##   13.31   24.75   29.67   31.59   32.97 
## 
## (Only 5 dimensions (out of 102) are shown)

   A <- s.class(map2023.pca$li,fac=popfile,col=transp(mycol,.6),clab=1,pch=19,grid=0,cpoint=1.25,cstar=0,cellipse=1)

# PoolTest R package practice example analysis
#install.packages("PoolTestR")
#install.packages("StanHeaders", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
#install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
#install.packages("devtools") #you can skip this if you already have devtools installed
#devtools::install_github("AngusMcLure/PoolTestR")
library(PoolTestR)
nrow(SimpleExampleData)

## [1] 1152

head(SimpleExampleData)

##   NumInPool  Site Village Year Region Result
## 1         1 A-1-1     A-1    0      A      0
## 2         5 A-1-1     A-1    0      A      1
## 3        10 A-1-1     A-1    0      A      1
## 4         1 A-1-2     A-1    0      A      0
## 5         5 A-1-2     A-1    0      A      0
## 6        10 A-1-2     A-1    0      A      1

PrevWholeDataset <- PoolPrev(SimpleExampleData,Result,NumInPool)
PrevWholeDataset

## # A tibble: 1 × 9
##   PrevMLE  CILow CIHigh PrevBayes CrILow CrIHigh ProbAbsent NumberOfPools
##     <dbl>  <dbl>  <dbl>     <dbl>  <dbl>   <dbl> <lgl>              <int>
## 1  0.0572 0.0510 0.0639    0.0574 0.0508  0.0640 NA                  1152
## # ℹ 1 more variable: NumberPositive <dbl>

PrevByRegion <- PoolPrev(SimpleExampleData, Result, NumInPool, Region)
PrevByRegion

## # A tibble: 4 × 10
##   Region PrevMLE   CILow CIHigh PrevBayes  CrILow CrIHigh ProbAbsent
##   <fct>    <dbl>   <dbl>  <dbl>     <dbl>   <dbl>   <dbl> <lgl>     
## 1 A       0.108  0.0900  0.128     0.108  0.0900   0.127  NA        
## 2 B       0.0353 0.0263  0.0460    0.0354 0.0261   0.0456 NA        
## 3 C       0.0108 0.00637 0.0170    0.0112 0.00663  0.0170 NA        
## 4 D       0.0978 0.0809  0.117     0.0980 0.0808   0.117  NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevByVillage <- PoolPrev(SimpleExampleData, Result, NumInPool, Village)
PrevByVillage

## # A tibble: 16 × 10
##    Village PrevMLE    CILow CIHigh PrevBayes   CrILow CrIHigh ProbAbsent
##    <chr>     <dbl>    <dbl>  <dbl>     <dbl>    <dbl>   <dbl> <lgl>     
##  1 A-1     0.120   0.0838   0.165    0.121   0.0840    0.165  NA        
##  2 A-2     0.0946  0.0637   0.133    0.0965  0.0639    0.133  NA        
##  3 A-3     0.171   0.122    0.231    0.173   0.125     0.229  NA        
##  4 A-4     0.0690  0.0444   0.101    0.0700  0.0453    0.0994 NA        
##  5 B-1     0.0253  0.0122   0.0451   0.0264  0.0122    0.0455 NA        
##  6 B-2     0.0459  0.0266   0.0725   0.0467  0.0266    0.0717 NA        
##  7 B-3     0.0257  0.0124   0.0460   0.0266  0.0122    0.0449 NA        
##  8 B-4     0.0458  0.0265   0.0723   0.0472  0.0272    0.0723 NA        
##  9 C-1     0.00533 0.000889 0.0164   0.00651 0.000958  0.0168 NA        
## 10 C-2     0.0224  0.0103   0.0414   0.0241  0.0109    0.0423 NA        
## 11 C-3     0.00805 0.00201  0.0207   0.00921 0.00231   0.0212 NA        
## 12 C-4     0.00805 0.00201  0.0207   0.00921 0.00236   0.0209 NA        
## 13 D-1     0.0656  0.0412   0.0978   0.0668  0.0408    0.0975 NA        
## 14 D-2     0.157   0.112    0.212    0.157   0.112     0.208  NA        
## 15 D-3     0.112   0.0777   0.155    0.113   0.0779    0.156  NA        
## 16 D-4     0.0720  0.0458   0.106    0.0738  0.0463    0.106  NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevByYear <- PoolPrev(SimpleExampleData, Result, NumInPool, Year)
PrevByYear

## # A tibble: 3 × 10
##    Year PrevMLE  CILow CIHigh PrevBayes CrILow CrIHigh ProbAbsent NumberOfPools
##   <int>   <dbl>  <dbl>  <dbl>     <dbl>  <dbl>   <dbl> <lgl>              <int>
## 1     0  0.0680 0.0564 0.0810    0.0682 0.0563  0.0803 NA                   384
## 2     1  0.0483 0.0388 0.0591    0.0483 0.0393  0.0583 NA                   384
## 3     2  0.0560 0.0457 0.0677    0.0562 0.0455  0.0676 NA                   384
## # ℹ 1 more variable: NumberPositive <dbl>

PrevByRegionYear <- PoolPrev(SimpleExampleData, Result, NumInPool, Region, Year)
PrevByRegionYear

## # A tibble: 12 × 11
##    Region  Year PrevMLE    CILow CIHigh PrevBayes   CrILow CrIHigh ProbAbsent
##    <fct>  <int>   <dbl>    <dbl>  <dbl>     <dbl>    <dbl>   <dbl> <lgl>     
##  1 A          0 0.140   0.104    0.182    0.141   0.104     0.181  NA        
##  2 A          1 0.0890  0.0628   0.121    0.0896  0.0625    0.120  NA        
##  3 A          2 0.0994  0.0716   0.133    0.101   0.0715    0.133  NA        
##  4 B          0 0.0306  0.0173   0.0491   0.0316  0.0177    0.0495 NA        
##  5 B          1 0.0302  0.0171   0.0486   0.0308  0.0172    0.0475 NA        
##  6 B          2 0.0455  0.0286   0.0679   0.0465  0.0289    0.0677 NA        
##  7 C          0 0.0145  0.00626  0.0279   0.0154  0.00684   0.0279 NA        
##  8 C          1 0.00396 0.000659 0.0122   0.00475 0.000760  0.0120 NA        
##  9 C          2 0.0143  0.00617  0.0275   0.0151  0.00639   0.0273 NA        
## 10 D          0 0.128   0.0940   0.168    0.129   0.0949    0.169  NA        
## 11 D          1 0.0911  0.0642   0.124    0.0923  0.0661    0.125  NA        
## 12 D          2 0.0786  0.0545   0.109    0.0800  0.0556    0.109  NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

Model <- PoolReg(Result ~ Year + Region, SimpleExampleData, NumInPool)

## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package

## [1] "Model has no group/random effects. Using a fixed effect model (glm)"

summary(Model)

## 
## Call:
## stats::glm(formula = formula, family = stats::binomial(PoolLink(data[[poolSize]])), 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6144  -0.5969  -0.4249   0.7962   3.0556  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.97190    0.12559 -15.701  < 2e-16 ***
## Year        -0.13874    0.07716  -1.798   0.0722 .  
## RegionB     -1.20591    0.17890  -6.741 1.58e-11 ***
## RegionC     -2.40943    0.27082  -8.897  < 2e-16 ***
## RegionD     -0.11001    0.14466  -0.760   0.4470    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1304.34  on 1151  degrees of freedom
## Residual deviance:  990.58  on 1147  degrees of freedom
## AIC: 1000.6
## 
## Number of Fisher Scoring iterations: 6

getPrevalence(Model)

## $PopulationEffects
##    Year Region    Estimate       CILow     CIHigh
## 1     0      A 0.122185192 0.098117601 0.15116688
## 2     1      A 0.108067606 0.090367998 0.12874323
## 3     2      A 0.095403918 0.075702500 0.11956932
## 4     0      B 0.040009389 0.029328475 0.05436226
## 5     1      B 0.035007943 0.026435952 0.04622747
## 6     2      B 0.030611775 0.022154455 0.04215843
## 7     0      C 0.012354197 0.007441581 0.02044312
## 8     1      C 0.010771050 0.006606406 0.01751478
## 9     2      C 0.009388849 0.005605506 0.01568542
## 10    0      D 0.110867448 0.088550302 0.13795780
## 11    1      D 0.097911684 0.081385883 0.11736440
## 12    2      D 0.086322918 0.068098184 0.10885531

PrevByHier <- HierPoolPrev(SimpleExampleData, Result, NumInPool,
                           c("Village","Site"))
PrevByHier

## # A tibble: 1 × 5
##   PrevBayes CrILow CrIHigh NumberOfPools NumberPositive
##       <dbl>  <dbl>   <dbl>         <int>          <dbl>
## 1    0.0486 0.0262  0.0827          1152            292

## Sindew M., 2023: Pool screening analysis of Simulium fly xenomonitoring data and prevalence estimation 

library(PoolTestR)
pool_data <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/PoolScreening_Data.xlsx.csv", header = TRUE)
head(pool_data)

##   S.no  Zone District Year npool Result
## 1    1 Kaffa    Gimbo 2018   200      0
## 2    2 Kaffa    Gimbo 2018   200      0
## 3    3 Kaffa    Gimbo 2018   200      0
## 4    4 Kaffa    Gimbo 2018   200      0
## 5    5 Kaffa    Gimbo 2018   200      0
## 6    6 Kaffa    Gimbo 2018   200      0

summary(pool_data)

##       S.no            Zone             District              Year     
##  Min.   :  1.00   Length:218         Length:218         Min.   :2018  
##  1st Qu.: 55.25   Class :character   Class :character   1st Qu.:2018  
##  Median :109.50   Mode  :character   Mode  :character   Median :2019  
##  Mean   :109.50                                         Mean   :2019  
##  3rd Qu.:163.75                                         3rd Qu.:2019  
##  Max.   :218.00                                         Max.   :2021  
##      npool         Result       
##  Min.   :190   Min.   :0.00000  
##  1st Qu.:200   1st Qu.:0.00000  
##  Median :200   Median :0.00000  
##  Mean   :200   Mean   :0.02294  
##  3rd Qu.:200   3rd Qu.:0.00000  
##  Max.   :200   Max.   :1.00000

PrevalenceWholeDataset <- PoolPrev(pool_data, Result, npool)
PrevalenceWholeDataset

## # A tibble: 1 × 9
##    PrevMLE     CILow   CIHigh PrevBayes  CrILow CrIHigh ProbAbsent NumberOfPools
##      <dbl>     <dbl>    <dbl>     <dbl>   <dbl>   <dbl> <lgl>              <int>
## 1 0.000116 0.0000416 0.000249  0.000126 4.10e-5 2.55e-4 NA                   218
## # ℹ 1 more variable: NumberPositive <dbl>

PrevalenceDistrict <- PoolPrev(pool_data, Result, npool, District)

## Warning: There were 5 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 3 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 3 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevalenceDistrict

## # A tibble: 7 × 10
##   District          PrevMLE   CILow  CIHigh PrevBayes  CrILow CrIHigh ProbAbsent
##   <chr>               <dbl>   <dbl>   <dbl>     <dbl>   <dbl>   <dbl> <lgl>     
## 1 "Andracha and Ye… 0       0       3.43e-4 0.0000967 0       3.82e-4 NA        
## 2 "Bita and Chena " 1.27e-4 7.22e-6 5.57e-4 0.000186  1.11e-5 5.76e-4 NA        
## 3 "Bitta and Shish… 0       0       4.36e-4 0.000111  0       4.46e-4 NA        
## 4 "Gewata"          9.90e-5 5.65e-6 4.36e-4 0.000152  1.08e-5 4.71e-4 NA        
## 5 "Gimbo"           5.27e-4 1.31e-4 1.37e-3 0.000610  1.31e-4 1.42e-3 NA        
## 6 "Masha"           0       0       6.00e-4 0.000152  0       6.05e-4 NA        
## 7 "Saylem, Gesha &… 0       0       3.10e-4 0.0000811 0       3.07e-4 NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevalenceZone <- PoolPrev(pool_data, Result, npool, Zone)

## Warning: There were 2 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevalenceZone

## # A tibble: 2 × 10
##   Zone   PrevMLE     CILow   CIHigh PrevBayes    CrILow  CrIHigh ProbAbsent
##   <chr>    <dbl>     <dbl>    <dbl>     <dbl>     <dbl>    <dbl> <lgl>     
## 1 Kaffa 0.000125 0.0000449 0.000269  0.000137 0.0000432 0.000275 NA        
## 2 Sheka 0        0         0.000600  0.000147 0         0.000599 NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevalenceYear <- PoolPrev(pool_data, Result, npool, Year)

## Warning: There were 7 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevalenceYear

## # A tibble: 3 × 10
##    Year   PrevMLE      CILow   CIHigh PrevBayes     CrILow  CrIHigh ProbAbsent
##   <int>     <dbl>      <dbl>    <dbl>     <dbl>      <dbl>    <dbl> <lgl>     
## 1  2018 0.000211  0.0000654  0.000489 0.000233  0.0000723  0.000475 NA        
## 2  2019 0.0000741 0.00000423 0.000326 0.000108  0.00000699 0.000335 NA        
## 3  2021 0         0          0.000181 0.0000465 0          0.000176 NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevalenceYearZone <- PoolPrev(pool_data, Result, npool, Year, Zone)

## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 4 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevalenceYearZone

## # A tibble: 4 × 11
##    Year Zone    PrevMLE      CILow   CIHigh PrevBayes  CrILow CrIHigh ProbAbsent
##   <int> <chr>     <dbl>      <dbl>    <dbl>     <dbl>   <dbl>   <dbl> <lgl>     
## 1  2018 Kaffa 0.000253  0.0000786  0.000588 0.000285  8.35e-5 6.04e-4 NA        
## 2  2018 Sheka 0         0          0.000600 0.000151  0       5.88e-4 NA        
## 3  2019 Kaffa 0.0000741 0.00000423 0.000326 0.000113  9.36e-6 3.59e-4 NA        
## 4  2021 Kaffa 0         0          0.000181 0.0000474 0       1.82e-4 NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

PrevalenceDistrictYear <- PoolPrev(pool_data, Result, npool, District, Year)

## Warning: There were 3 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 4 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevalenceDistrictYear

## # A tibble: 7 × 11
##   District     Year PrevMLE   CILow  CIHigh PrevBayes  CrILow CrIHigh ProbAbsent
##   <chr>       <int>   <dbl>   <dbl>   <dbl>     <dbl>   <dbl>   <dbl> <lgl>     
## 1 "Andracha …  2019 0       0       3.43e-4 0.0000908 0       3.61e-4 NA        
## 2 "Bita and …  2019 1.27e-4 7.22e-6 5.57e-4 0.000189  1.41e-5 5.75e-4 NA        
## 3 "Bitta and…  2021 0       0       4.36e-4 0.000115  0       4.23e-4 NA        
## 4 "Gewata"     2018 9.90e-5 5.65e-6 4.36e-4 0.000149  9.72e-6 4.71e-4 NA        
## 5 "Gimbo"      2018 5.27e-4 1.31e-4 1.37e-3 0.000618  1.45e-4 1.45e-3 NA        
## 6 "Masha"      2018 0       0       6.00e-4 0.000150  0       5.68e-4 NA        
## 7 "Saylem, G…  2021 0       0       3.10e-4 0.0000810 0       3.10e-4 NA        
## # ℹ 2 more variables: NumberOfPools <int>, NumberPositive <dbl>

Model <- PoolReg(Result ~ Year + Zone, pool_data, npool)

## [1] "Model has no group/random effects. Using a fixed effect model (glm)"

summary(Model)

## 
## Call:
## stats::glm(formula = formula, family = stats::binomial(PoolLink(data[[poolSize]])), 
##     data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.32169  -0.32169  -0.16074  -0.04013   2.95146  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2792.108   1963.798   1.422    0.155
## Year          -1.388      0.973  -1.426    0.154
## ZoneSheka    -16.515   2570.993  -0.006    0.995
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47.635  on 217  degrees of freedom
## Residual deviance: 42.494  on 215  degrees of freedom
## AIC: 48.494
## 
## Number of Fisher Scoring iterations: 18

plot(Model)

getPrevalence(Model)

## $PopulationEffects
##   Year  Zone     Estimate        CILow       CIHigh
## 1 2018 Kaffa 2.586705e-04 9.886594e-05 0.0006766045
## 2 2018 Sheka 1.739217e-11 0.000000e+00 1.0000000000
## 3 2019 Kaffa 6.458969e-05 1.101128e-05 0.0003787700
## 4 2021 Kaffa 4.025812e-06 1.744359e-08 0.0009282633

PrevByHier <- HierPoolPrev(pool_data, Result, npool,
                           c("Zone","District"))

## Warning: There were 936 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess

## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess

PrevByHier

## # A tibble: 1 × 5
##   PrevBayes     CrILow CrIHigh NumberOfPools NumberPositive
##       <dbl>      <dbl>   <dbl>         <int>          <dbl>
## 1     0.121 0.00000998   0.924           218              5

PrevByHierYr <- HierPoolPrev(pool_data, Result, npool,
                           c("Zone","District"), Year)

## Warning: There were 372 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 401 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: There were 419 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

PrevByHierYr

## # A tibble: 3 × 6
##    Year PrevBayes    CrILow CrIHigh NumberOfPools NumberPositive
##   <int>     <dbl>     <dbl>   <dbl>         <int>          <dbl>
## 1  2018     0.153 0.0000205   0.935            97              4
## 2  2019     0.274 0.0000295   0.982            68              1
## 3  2021     0.384 0.0000294   0.992            53              0

### inset
library(raster)

## Loading required package: sp

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, were retired in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.

## 
## Attaching package: 'raster'

## The following object is masked from 'package:plotly':
## 
##     select

## The following object is masked from 'package:dplyr':
## 
##     select

library(ggplot2)
library(ggthemes)
library(geodata)

## Loading required package: terra

## terra 1.7.39

## 
## Attaching package: 'terra'

## The following objects are masked from 'package:ape':
## 
##     rotate, trans, zoom

## The following object is masked from 'package:ggpubr':
## 
##     rotate

## The following object is masked from 'package:janitor':
## 
##     crosstab

## The following object is masked from 'package:patchwork':
## 
##     area

## The following object is masked from 'package:tidyr':
## 
##     extract

library(maps)

## 
## Attaching package: 'maps'

## The following object is masked from 'package:viridis':
## 
##     unemp

## The following object is masked from 'package:mclust':
## 
##     map

## The following object is masked from 'package:purrr':
## 
##     map

library(sf)

## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE

map(regions = "Ethiopia")

## PCA of simulium flies in Ethiopia
library(pegas)
library(adegenet)
library(viridisLite)
library(viridis)
library(ape)
library(tidyverse)
library(densityClust)
VCFfile <- "D:/All187/sindew.rmi3.qual.sd.msg85.nos.v2.recode.vcf"
Population100 <- read.csv("C:/Users/EPHI/Desktop/LTU_Lab Work 2023/Bioinformatics and R files/Population100.csv")
##
#haplotypes <- haplotype(VCFfile)
sequence1 <- read.loci(VCFfile)
#sequence2 <- read.genepop(VCFfile)
#plot.haploGen(VCFfile)


##
sim_pop <- Population100$Location
sim_pop

##   [1] "Daroo"      "Daroo"      "Daroo"      "Dorm09"     "Dorm09"    
##   [6] "Sashi"      "Sashi"      "Beles "     "BL4"        "BL4"       
##  [11] "BL4"        "BL4"        "BL4"        "BL4"        "BL4"       
##  [16] "BL4"        "BL4"        "BL4"        "BL4"        "BL4"       
##  [21] "BL4"        "BL4"        "BL4"        "BL4"        "Godguwadit"
##  [26] "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit"
##  [31] "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit"
##  [36] "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit"
##  [41] "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit"
##  [46] "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit" "Godguwadit"
##  [51] "Godguwadit" "Godguwadit" "Godguwadit" "Kisha"      "Kisha"     
##  [56] "Kisha"      "Kisha"      "Kisha"      "Kisha"      "Kisha"     
##  [61] "Kisha"      "Kisha"      "Kisha"      "Kisha"      "Kisha"     
##  [66] "Kisha"      "Kisha"      "Kisha"      "Kisha"      "Kisha"     
##  [71] "Kisha"      "Kisha"      "Kisha"      "Kisha"      "Worshi"    
##  [76] "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu" 
##  [81] "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu" 
##  [86] "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu" 
##  [91] "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu" 
##  [96] "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"  "Wudigemzu"

mycol <- viridis(9)
# mycol <- magma(9)
    read.vcf(VCFfile)->map2023.vcf

## File apparently not yet accessed:
## Scanning file D:/All187/sindew.rmi3.qual.sd.msg85.nos.v2.recode.vcf 
## 
 37.3867 / 37.3867 Mb
 37.3867 / 37.3867 Mb
## Done.
## 
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3300 / 10000 loci
Reading 3400 / 10000 loci
Reading 3500 / 10000 loci
Reading 3600 / 10000 loci
Reading 3700 / 10000 loci
Reading 3800 / 10000 loci
Reading 3900 / 10000 loci
Reading 4000 / 10000 loci
Reading 4100 / 10000 loci
Reading 4200 / 10000 loci
Reading 4300 / 10000 loci
Reading 4400 / 10000 loci
Reading 4500 / 10000 loci
Reading 4600 / 10000 loci
Reading 4700 / 10000 loci
Reading 4800 / 10000 loci
Reading 4900 / 10000 loci
Reading 5000 / 10000 loci
Reading 5100 / 10000 loci
Reading 5200 / 10000 loci
Reading 5300 / 10000 loci
Reading 5400 / 10000 loci
Reading 5500 / 10000 loci
Reading 5600 / 10000 loci
Reading 5700 / 10000 loci
Reading 5800 / 10000 loci
Reading 5900 / 10000 loci
Reading 6000 / 10000 loci
Reading 6100 / 10000 loci
Reading 6200 / 10000 loci
Reading 6300 / 10000 loci
Reading 6400 / 10000 loci
Reading 6500 / 10000 loci
Reading 6600 / 10000 loci
Reading 6700 / 10000 loci
Reading 6800 / 10000 loci
Reading 6900 / 10000 loci
Reading 7000 / 10000 loci
Reading 7100 / 10000 loci
Reading 7200 / 10000 loci
Reading 7300 / 10000 loci
Reading 7400 / 10000 loci
Reading 7500 / 10000 loci
Reading 7600 / 10000 loci
Reading 7700 / 10000 loci
Reading 7800 / 10000 loci
Reading 7900 / 10000 loci
Reading 8000 / 10000 loci
Reading 8100 / 10000 loci
Reading 8200 / 10000 loci
Reading 8300 / 10000 loci
Reading 8400 / 10000 loci
Reading 8500 / 10000 loci
Reading 8600 / 10000 loci
Reading 8608 / 8608 loci.
## Done.

    popfile <- as.factor(sim_pop)
    ploidy<-2
    loci2genind(map2023.vcf,ploidy=ploidy)->map2023.gi

## Warning in adegenet::df2genind(as.matrix(x[, attr(x, "locicol"), drop =
## FALSE]), : duplicate labels detected for some loci; using generic labels

    map2023.X<-tab(map2023.gi,NA.method="mean")             
    map2023.pca<-dudi.pca(map2023.X,center=TRUE,scale=FALSE,nf=200,scannf=FALSE)
    summary(map2023.pca)

## Class: pca dudi
## Call: dudi.pca(df = map2023.X, center = TRUE, scale = FALSE, scannf = FALSE, 
##     nf = 200)
## 
## Total inertia: 2601
## 
## Eigenvalues:
##     Ax1     Ax2     Ax3     Ax4     Ax5 
##  360.83  247.32  125.46   52.26   37.17 
## 
## Projected inertia (%):
##     Ax1     Ax2     Ax3     Ax4     Ax5 
##  13.875   9.510   4.824   2.010   1.429 
## 
## Cumulative projected inertia (%):
##     Ax1   Ax1:2   Ax1:3   Ax1:4   Ax1:5 
##   13.88   23.39   28.21   30.22   31.65 
## 
## (Only 5 dimensions (out of 99) are shown)

    #view(map2023.X)
   A <- s.class(map2023.pca$li,fac=as.factor(sim_pop),col=transp(mycol,.6),clab=0,pch=19,grid=0,cpoint=1.25,cstar=0,cellipse=1)

## to do DAPC, you need more than one individual per population
   map2023.pop<-as.factor(sim_pop)
   map2023.xval<-xvalDapc(map2023.X,grp=map2023.pop,n.pca.max=300,training.set=.9,result="groupMean",center=TRUE,scale=FALSE,n.pca=NULL,n.rep=100,xval.plot=TRUE)

## Warning in xvalDapc.matrix(map2023.X, grp = map2023.pop, n.pca.max = 300, : 2
## groups have only 1 member: these groups cannot be represented in both training
## and validation sets.

summary(map2023.xval$DAPC)

## $n.dim
## [1] 8
## 
## $n.pop
## [1] 9
## 
## $assign.prop
## [1] 0.56
## 
## $assign.per.pop
##     Beles         BL4      Daroo     Dorm09 Godguwadit      Kisha      Sashi 
##  1.0000000  0.3125000  1.0000000  0.5000000  0.6896552  0.4761905  0.0000000 
##     Worshi  Wudigemzu 
##  1.0000000  0.6000000 
## 
## $prior.grp.size
## 
##     Beles         BL4      Daroo     Dorm09 Godguwadit      Kisha      Sashi 
##          1         16          3          2         29         21          2 
##     Worshi  Wudigemzu 
##          1         25 
## 
## $post.grp.size
## 
##     Beles         BL4      Daroo     Dorm09 Godguwadit      Kisha      Sashi 
##          1          6          3          2         40         18          0 
##     Worshi  Wudigemzu 
##          5         25

## here's one figure
scatter(map2023.xval$DAPC,scree.da=FALSE,legend=1,pch=19,cstar=1,clab=0,cex=1.25,cellipse=1,col=transp(mycol,0.6))

## here's another
library(dplyr)
library(tidyr)
compoplot(map2023.xval$DAPC,col.pal=funky,show.lab=FALSE,posi="topright",cleg = 0.6,bg = transp("white"))

#compoplot(map2023.xval$DAPC,col=mycol,show.lab=FALSE,posi="topright",cleg = 0.6,
#bg = transp("white"), col.pal = funky)
map2023ass<-summary(map2023.xval$DAPC)$assign.per.pop*100
## here's a figure
barplot(map2023ass,xlab="% correct assignment",horiz=TRUE,las=1,col=mycol,xlim=c(0,100))

# Function to get raster pixel coordinates, values, and region names from a shape file and elevation raster map (Ethiopia)

get_raster_coordinates_and_regions <- function(raster_data, shapefile_data) {
  
  # Convert raster data to sf point data frame with longitude and latitude
  raster_points <- rasterToPoints(raster_data) %>% data.frame() 
  
  raster_points_sp <- st_as_sf(raster_points, coords = c("x", "y"), crs = st_crs(shapefile_data)) %>% as("Spatial")

  # Find corresponding region for each point
  points_regions <- sp::over(raster_points_sp, shapefile_data)

  # Add coordinates, values, and NAMES to the data frame
  result_df <- points_regions %>% 
    select(NAME_1, NAME_2, NAME_3) %>%
    mutate(longitude = raster_points[,2], latitude = raster_points[,3], values = raster_points[,1])


  return(result_df)
}

#+++FUNCTION USAGE+++

# Load necessary libraries
library(raster)
library(sf)
library(geodata)
library(tidyverse)
library(sp)

# Set the directory for data
output_dir <- "data/"

# Load raster data
elevation_ETH <- elevation_30s(country = "ETH", path = output_dir)

elevation_ETH_raster <- raster(elevation_ETH)

# Load shapefile data
ethiopia_boundary <- gadm("ETH", level = 3, path = output_dir) %>% st_as_sf() %>% as("Spatial") 

# Example usage of the function with the provided inputs
raster_data <-  elevation_ETH_raster # Replace with your raster file
shapefile_data <- ethiopia_boundary      # Replace with your shapefile
raster_data

## class      : RasterLayer 
## dimensions : 1428, 1836, 2621808  (nrow, ncol, ncell)
## resolution : 0.008333333, 0.008333333  (x, y)
## extent     : 32.8, 48.1, 3.2, 15.1  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : ETH_elv_msk.tif 
## names      : ETH_elv_msk 
## values     : -202, 4459  (min, max)

#output_data <- get_raster_coordinates_and_regions(raster_data, shapefile_data)
#print(output_data)
# Set the directory for data
output_dir <- "data/"

#Load raster data - mean prevalence
elevation_ETH <- elevation_30s(country = "ETH", path = output_dir)

#elevation_ETH_raster <- raster("data/231106_ethiopia_mean_prev.tif")
elevation_ETH_raster <- raster("C:/Users/EPHI/Downloads/231106_ethiopia_mean_prev.tif")

# Load shapefile data
ethiopia_boundary <- gadm("ETH", level = 2, path = output_dir) %>% st_as_sf() %>% as("Spatial") 

plot(elevation_ETH_raster, main = "Mean prevalence of onchocerciasis in Ethiopia")

# Load necessary libraries
library(raster)
library(sf)
library(geodata)
library(tidyverse)
library(sp)
# Set the directory for data
output_dir <- "data/"

# Load raster data - mean prevalence
# elevation_ETH <- elevation_30s(country = "ETH", path = output_dir)

#elevation_ETH_raster <- raster("data/231106_ethiopia_sd_prev.tif")

elevation_ETH_raster <- raster("C:/Users/EPHI/Downloads/231106_ethiopia_sd_prev.tif")
# Load shapefile data
ethiopia_boundary <- gadm("ETH", level = 3, path = output_dir) %>% st_as_sf() %>% as("Spatial") 
plot(ethiopia_boundary)

plot(elevation_ETH_raster, main = "Standard deviation of Onchocerciasis prevalence in Ethiopia")

## Time series analysis scripts for predicting onchocerciasis intervention coverage and its outcomes_SMF-Nov2023
ts (1:6, frequency = 6, start = 1995)

## Time Series:
## Start = c(1995, 1) 
## End = c(1995, 6) 
## Frequency = 6 
## [1] 1 2 3 4 5 6

plot(ts)

library(tidyverse)
coverage1 <- read.csv('C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv', header = TRUE)
#view(coverage1)
ylabel <- c(0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
Bar1 <- ggplot(data = coverage1, aes(Year,Survey_coverage...., fill=District))+
geom_bar(stat = "identity", position = "stack")+
scale_x_continuous(breaks = seq(2014, 2020, by=2))+
scale_y_continuous(breaks = seq(0, 1200, by=100))+
theme_bw()+
  ylab("Survey coverages(%) in Kebeles (sub-districts)")+
  xlab("Community MDAi coverage survey years")+
  annotate("text", label = "Figure 4A.2: Survey coverages(%) by districts p=0.011",x=2015.8, y=940)+
  theme(text  = element_text(size = 10))
 

Bar1+ geom_text(
  aes(label = Survey_coverage....),
  position = position_stack(vjust = 0.5),  # Adjust vertical position of labels
  color = "black",                           # Text color
  size = 3                                   # Text size
)

Bar2<-ggplot(data = coverage1, aes(Year,Reported_coverage...., fill=District))+
geom_bar(stat = "identity", position = "stack")+
scale_x_continuous(breaks = seq(2014, 2020, by=2))+
scale_y_continuous(breaks = seq(0, 1200, by=100))+
  theme_bw()+
  ylab("Reported coverage(%) in kebeles(sub-districts)")+
  xlab("Community MDAi coverage report years")+
  annotate("text", label = "Figure 4A.1: Reported coverages(%) by districts p=0.49",x=2015.9, y=940)+
  theme(text  = element_text(size = 10))

Bar2+ geom_text(
  aes(label = Reported_coverage....),
  position = position_stack(vjust = 0.5),  # Adjust vertical position of labels
  color = "black",                           # Text color
  size = 3                                   # Text size
)

Annova_RC <- aov(Survey_coverage.... ~ Kebele , data = coverage1)

library(tidyverse)
coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
Annova_RC <- aov(Survey_coverage.... ~ Kebele , data = coverage1)
Annova_RC <- aov(Survey_coverage.... ~ District , data = coverage1)
Annova_RC <- aov(Survey_coverage.... ~ Year , data = coverage1)
Annova_RC <- aov(Reported_coverage.... ~ Kebele , data = coverage1)
Annova_RC <- aov(Reported_coverage.... ~ Year , data = coverage1)
Annova_RC <- aov(Reported_coverage.... ~ District , data = coverage1)

library(sf)
library(geodata)
output_dir <- "data/"
ethiopia <- gadm("ETH", level = 3, path = output_dir) %>% st_as_sf() %>% as("Spatial") 
plot(ethiopia)

ethiopia2 <- gadm("ETH", level = 2, path = output_dir) %>% st_as_sf() %>% as("Spatial") 
plot(ethiopia2)

#view(ethiopia2)
ethiopia2 <- gadm("ETH", level = 2, path = output_dir) %>% st_as_sf() %>% as("Spatial")

## generating country, sub-country and location maps 
library(sp)
library(tidyverse)
library(geodata)
output_dir <- "data/"
Ethiopia <- gadm("ETH", level = 3, path = output_dir)
#view(Ethiopia)

##EEMS
library(densityClust)
#irisDist <- dist(iris[,1:100])
#irisClust <- densityClust(irisDist, gaussian=TRUE)
#> Distance cutoff calculated to 0.2767655
#plot(irisClust) # Inspect clustering attributes to define thresholds

#irisClust <- findClusters(irisClust, rho=2, delta=2)
#plotMDS(irisClust)
#View (iris)

library(densityClust)
mapDist <- dist(map2023.X[, 0:100])
#mapDist <- dist(map2023.X[, 0:100])
mapClust <- densityClust(mapDist, gaussian=TRUE)

## Distance cutoff calculated to 1.59099

#> Distance cutoff calculated to 0.2767655
plot(mapClust) # Inspect clustering attributes to define thresholds

mapClust <- findClusters(mapClust, rho=2, delta=2)
plotMDS(mapClust)

#View (map2023.X)
#split(map2023.X[,5], mapClust$clusters)

## Reported and survey coverage (%) heat map and correlation and interactive figures.  
library(tidyverse)
library(plotly)
coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
## Select data and generate data frame for analysis.
Survey_year<-coverage1$Year
Survey_value<-coverage1$Survey_coverage....
Report_value<-coverage1$Reported_coverage....
datak <- data.frame(Survey_year,Report_value,Survey_value)
data1=as.matrix(datak)
head(data1)

##      Survey_year Report_value Survey_value
## [1,]        2014           81           88
## [2,]        2014           83           85
## [3,]        2014           84           87
## [4,]        2014           88           96
## [5,]        2016           84           74
## [6,]        2016           77           67

heatmap(data1,cexCol=0.8,cexRow=0.8,scale='column',Colv =NA,Rowv=NA,ylab='Figure 4A.3. Observations (1-33)')

plot_ly(z=data1,colorscale="gray", type = "heatmap")

q <- qplot(Report_value, Survey_value, data = datak, color=Survey_year)+facet_wrap(Survey_year)

## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot(q)

ggplotly(q)

### generating lollipop in R 
library(tidyverse)
library(hrbrthemes)

## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.

##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and

##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow

library(plotly)
#view(coverage1)
K<-ggplot(coverage1)+
  geom_point(aes(x=District, y=Reported_coverage...., color="green"))+
  geom_point(aes(x=District, y=Survey_coverage...., color= "red"))+
  geom_segment(aes(x=District,xend=District, y=Survey_coverage...., yend=Reported_coverage....))+
  coord_flip()+
  theme_ipsum()+
  scale_fill_hue(labels = c("Survey coverage(%)", "Reported coverage(%)"))+
  xlab("MDAi coverage monitoring districts")+ ylab("Figure 4A.1.Reported (green dot) and surveyed (red dot) coverage(%) report; 2014-2020")+    
  geom_hline(yintercept = 80)+
      annotate("text", label = "Survey (%) by districts
           p=0.011",x=6, y=70)+
  annotate("text", label = "Report (%) by districts
           p=0.49",x=4.5, y=70)+
  theme(text  = element_text(size = 9)) 
plot(K)

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

  ggplotly(K)

library(tidyverse)
library(ggcorrplot)
library(igraph)

## 
## Attaching package: 'igraph'

## The following object is masked from 'package:densityClust':
## 
##     clusters

## The following objects are masked from 'package:terra':
## 
##     blocks, compare, union

## The following object is masked from 'package:raster':
## 
##     union

## The following object is masked from 'package:pegas':
## 
##     mst

## The following objects are masked from 'package:ape':
## 
##     degree, edges, mst, ring

## The following object is masked from 'package:plotly':
## 
##     groups

## The following objects are masked from 'package:lubridate':
## 
##     %--%, union

## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:purrr':
## 
##     compose, simplify

## The following object is masked from 'package:tidyr':
## 
##     crossing

## The following object is masked from 'package:tibble':
## 
##     as_data_frame

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

#attach(iris)
coverage1 <- read.csv("C:/Users/EPHI/Desktop/Sindew 2021/My LTU File_2022/Keffa Sheka Review Papaer 2022/Coverage_Keffa_Sheka_R.csv", header = TRUE)
#view(coverage1)
Survey_year<-coverage1$Year
Survey_coverage<-coverage1$Survey_coverage....
Report_coverage<-coverage1$Reported_coverage....
#data <- data.frame(Survey_year,Report_value,Survey_value)
data <- data.frame(Report_coverage,Survey_coverage)
Coverage_data=as.matrix(data)
#head(data1)
#view(data1)
heatmap(Coverage_data,cexCol=0.8,cexRow = 0.8,scale = 'column', Colv =NA, Rowv = NA, ylab='Figure 4A.2 Observations (1-33)')

heatmap(Coverage_data,scale = 'non', cexCol=0.8,cexRow = 0.8, ylab='Figure 4A.2 Observations (1-33)')

#install.packages(gplots)
library("gplots")

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

heatmap.2(Coverage_data, scale = "none", col = bluered(100), 
          trace = "none", density.info = "none", cexCol=0.8,cexRow = 0.8)

#BiocManager::install("ComplexHeatmap")
#install.packages(ComplexHeatmap)
library(ComplexHeatmap)

## Loading required package: grid

## 
## Attaching package: 'grid'

## The following object is masked from 'package:terra':
## 
##     depth

## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================

## 
## Attaching package: 'ComplexHeatmap'

## The following object is masked from 'package:terra':
## 
##     draw

## The following object is masked from 'package:plotly':
## 
##     add_heatmap

#view(data1)
Heatmap(Coverage_data, 
        name = "Coverage(%)", #title of legend
        column_title = "Survey types", row_title = "Coverages(%)(2014-2020)") # Text size for row names

densityHeatmap(scale(Coverage_data))

 #legend(x="right", legend=c("min", "max"))#,fill=heat.colors)
#or(data1)
cor=round(cor(data1),1)
cor

##              Survey_year Report_value Survey_value
## Survey_year          1.0          0.1          0.1
## Report_value         0.1          1.0          0.4
## Survey_value         0.1          0.4          1.0

ggcorrplot(cor)

ggcorrplot(cor, hc.order = TRUE, type = "upper", title = "Correlogram MDAi coverage data", legend.title="pearson \n corr", lab = TRUE, lab_size = 2, ggtheme = theme_dark)

p.mat<-cor_pmat(data1)
p.mat

##              Survey_year Report_value Survey_value
## Survey_year     0.000000   0.44854799   0.46203298
## Report_value    0.448548   0.00000000   0.01858767
## Survey_value    0.462033   0.01858767   0.00000000

ggcorrplot(cor, hc.order = TRUE,method="square", type = "upper", title = "Correlogram MDAi coverage data", legend.title="pearson \n corr", lab = TRUE, p.mat = p.mat, lab_size = 2)

ggcorrplot(cor, method="square",
           hc.order=TRUE,
           outline.color = "white",
           type = "upper", p.mat = p.mat, ggtheme = theme_dark)#xlab="Figure 4A.2  heatmap")

### Part DD: social network analysis in R (Tutorial (Not Related)