Load Libraries

library(rvest)
## Loading required package: xml2
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
library(ggsci)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************

Load Dataset

setwd("C:/Users/Valued Customer/Desktop/Lovebug/Montgomery College/DATA 110/Week 3")
sw_le <- read.csv("SafeWaterLifeExpectancy.csv")

Rename Columns

# Changed column name in dataset
sw_le2 <- read.csv("SafeWaterLifeExpectancy_colrename.csv")
# Added column for continent
sw_le3 <- read.csv("SafeWaterLifeExpectancy_colrename_continent.csv")
head(sw_le3)
##   Country.Name Perc..Pop..Safe.Water Average.Life.Expectancy..years. Continent
## 1       Uganda               6.43900                        59.50876    Africa
## 2     Ethiopia              10.53542                        65.00829    Africa
## 3      Nigeria              19.40221                        52.97793    Africa
## 4     Cambodia              24.09879                        68.47205      Asia
## 5        Nepal              26.75171                        69.86985      Asia
## 6        Ghana              26.86492                        62.40724    Africa
dim(sw_le3)
## [1] 81  4
summary(sw_le3)
##  Country.Name       Perc..Pop..Safe.Water Average.Life.Expectancy..years.
##  Length:81          Min.   :  6.439       Min.   :52.98                  
##  Class :character   1st Qu.: 68.870       1st Qu.:72.22                  
##  Mode  :character   Median : 91.694       Median :76.64                  
##                     Mean   : 79.403       Mean   :75.68                  
##                     3rd Qu.: 98.024       3rd Qu.:81.39                  
##                     Max.   :100.000       Max.   :83.84                  
##   Continent        
##  Length:81         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
describe(sw_le3)
## Warning in describe(sw_le3): NAs introduced by coercion

## Warning in describe(sw_le3): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
##                                 vars  n  mean    sd median trimmed   mad   min
## Country.Name*                      1 81   NaN    NA     NA     NaN    NA   Inf
## Perc..Pop..Safe.Water              2 81 79.40 24.87  91.69   83.84 10.68  6.44
## Average.Life.Expectancy..years.    3 81 75.68  6.55  76.64   76.51  7.04 52.98
## Continent*                         4 81   NaN    NA     NA     NaN    NA   Inf
##                                    max range  skew kurtosis   se
## Country.Name*                     -Inf  -Inf    NA       NA   NA
## Perc..Pop..Safe.Water           100.00 93.56 -1.30     0.57 2.76
## Average.Life.Expectancy..years.  83.84 30.87 -1.26     1.90 0.73
## Continent*                        -Inf  -Inf    NA       NA   NA

Look at the structure of the data

str(sw_le3)
## 'data.frame':    81 obs. of  4 variables:
##  $ Country.Name                   : chr  "Uganda" "Ethiopia" "Nigeria" "Cambodia" ...
##  $ Perc..Pop..Safe.Water          : num  6.44 10.54 19.4 24.1 26.75 ...
##  $ Average.Life.Expectancy..years.: num  59.5 65 53 68.5 69.9 ...
##  $ Continent                      : chr  "Africa" "Africa" "Africa" "Asia" ...

Rename Columns

colnames(sw_le2)<- c("country", "wateraccess", "lifeexpectancy")
colnames(sw_le3)<- c("country", "wateraccess", "lifeexpectancy", "continent")

Plots

p1<-sw_le3%>%
  ggplot(aes(x=wateraccess, fill= country))+
  geom_histogram(position="stack",binwidth=5)+
  labs(title = "Clean Water Access by Country")
ggplotly(p1)
p2<-sw_le3%>%
  ggplot(aes(x=lifeexpectancy, fill= country))+
  geom_histogram(position="stack",binwidth=5)+
  labs(title = "Life Expectancy by Country")

ggplotly(p2)

Clean Water Access - Bottom 10 countries

cw_bottom10 <- sw_le2 %>% 
  filter(wateraccess < 45)
cw_bottom10
##        country wateraccess lifeexpectancy
## 1       Uganda     6.43900       59.50876
## 2     Ethiopia    10.53542       65.00829
## 3      Nigeria    19.40221       52.97793
## 4     Cambodia    24.09879       68.47205
## 5        Nepal    26.75171       69.86985
## 6        Ghana    26.86492       62.40724
## 7       Bhutan    34.15480       69.80712
## 8     Pakistan    35.63796       66.33217
## 9  Congo, Rep.    37.00161       64.09073
## 10      Mexico    42.61278       76.88137
cw_bottom10_2 <- sw_le3 %>% 
  filter(wateraccess < 45)
p3 <- cw_bottom10 %>%
  ggplot(aes(x=wateraccess, fill=country)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "white")+
  scale_fill_discrete(name = "Country")
p3

p4<-cw_bottom10%>%
  ggplot(aes(x=wateraccess, fill= country))+
  geom_histogram(position="stack",binwidth=2)+
  labs(title = "Clean Water Access Bottom 10 Countries")
ggplotly(p4)
p5<-sw_le2 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=lifeexpectancy,col=country), alpha=0.72)+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")
ggplotly(p5)

Too many countries. So separate by bottom and top 10

p6<-cw_bottom10 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=lifeexpectancy,col=country), alpha=0.72)+
  ggtitle("Life Expectancy by Water Access")
ggplotly(p6)
cw_top10<- sw_le2 %>% 
  filter(wateraccess > 98.7)
cw_top10
##          country wateraccess lifeexpectancy
## 1        Bahrain    98.74000       76.86520
## 2        Ireland    98.87529       81.50244
## 3         Greece    98.89577       81.58780
## 4  United States    99.02272       78.74146
## 5        Germany    99.21542       81.09024
## 6         Israel    99.39000       82.05122
## 7         Cyprus    99.60259       80.29105
## 8          Malta    99.94286       81.94634
## 9    Netherlands    99.95510       81.70732
## 10        Kuwait   100.00000       74.74683
## 11 Liechtenstein   100.00000       82.07317
## 12   New Zealand   100.00000       81.45683
p7<-cw_top10 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=lifeexpectancy,col=country), alpha=0.72)+
  ggtitle("Life Expectancy by Water Access")
ggplotly(p7)

Overall Plot with Regression Line

sw_le2 %>% 
  ggplot(aes(wateraccess,lifeexpectancy)) + 
  geom_point()+
  geom_smooth(method=lm,se=T)
## `geom_smooth()` using formula 'y ~ x'

cw_top_bottom <-sw_le2 %>% 
  filter(wateraccess<50|wateraccess>98.7)
cw_top_bottom
##          country wateraccess lifeexpectancy
## 1         Uganda     6.43900       59.50876
## 2       Ethiopia    10.53542       65.00829
## 3        Nigeria    19.40221       52.97793
## 4       Cambodia    24.09879       68.47205
## 5          Nepal    26.75171       69.86985
## 6          Ghana    26.86492       62.40724
## 7         Bhutan    34.15480       69.80712
## 8       Pakistan    35.63796       66.33217
## 9    Congo, Rep.    37.00161       64.09073
## 10        Mexico    42.61278       76.88137
## 11 Cote d'Ivoire    45.80163       53.07824
## 12    Tajikistan    47.40355       71.00712
## 13       Lebanon    47.52393       79.49824
## 14       Bahrain    98.74000       76.86520
## 15       Ireland    98.87529       81.50244
## 16        Greece    98.89577       81.58780
## 17 United States    99.02272       78.74146
## 18       Germany    99.21542       81.09024
## 19        Israel    99.39000       82.05122
## 20        Cyprus    99.60259       80.29105
## 21         Malta    99.94286       81.94634
## 22   Netherlands    99.95510       81.70732
## 23        Kuwait   100.00000       74.74683
## 24 Liechtenstein   100.00000       82.07317
## 25   New Zealand   100.00000       81.45683

Top and Bottom 10 Countries Clean Water Access Combined

p8<-cw_top_bottom %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=wateraccess,col=country), alpha=0.72)+
  ggtitle("Life Expectancy by Water Access")
ggplotly(p8)
boxplot(sw_le2$wateraccess, sw_le2$lifeexpectancy)

plot1 <- ggplot(cw_bottom10, aes(x = wateraccess, y = lifeexpectancy))+
  geom_boxplot()+
  ggtitle("Bottom 10 Water Access")+
  ylab("Life Expectancy (years)")+
  xlab("Clean Water Access (% Pop.)")

plot2 <- ggplot(cw_top10, aes(x = wateraccess, y = lifeexpectancy))+
  geom_boxplot()+
  ggtitle("Top 10 Water Access")+
  ylab("Life Expectancy (years)")+
  xlab("Clean Water Access (% Pop.)")

plot_grid(plot1, plot2, labels = "AUTO")
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

# Trying Cowplot - did not make a difference here
plot1 <- ggplot(cw_bottom10, aes(x = wateraccess, y = lifeexpectancy))+
  geom_boxplot()+
  ggtitle("Bottom 10 Water Access")+
  ylab("Life Expectancy (years)")+
  xlab("Clean Water Access (% Pop.)")

plot2 <- ggplot(cw_top10, aes(x = wateraccess, y = lifeexpectancy))+
  geom_boxplot()+
  ggtitle("Top 10 Water Access")+
  ylab("Life Expectancy (years)")+
  xlab("Clean Water Access (% Pop.)")

cowplot::plot_grid(plot1, plot2, labels = "AUTO")
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

theme(plot.title = element_text(hjust = 0.5))
## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE
cw_top_bottom %>% 
  ggplot(aes(wateraccess,lifeexpectancy)) + 
  geom_point()+
  geom_smooth(method=lm,se=T)+
  ggtitle("Life Expectancy by Clean Water Access")+
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy")
## `geom_smooth()` using formula 'y ~ x'

Exploring the middle

cw_middle <- sw_le2 %>% 
  filter(wateraccess<90 & wateraccess>70)
cw_middle
##                   country wateraccess lifeexpectancy
## 1                Colombia    71.12262       74.15620
## 2              Azerbaijan    71.51716       71.84520
## 3                 Georgia    72.97048       72.96612
## 4                 Ecuador    74.36078       76.08151
## 5                  Russia    75.51010       70.90854
## 6                 Hungary    81.54177       70.90854
## 7                 Estonia    81.69058       77.13171
## 8                  Latvia    81.93607       74.12439
## 9          Macedonia, FYR    83.48981       75.52432
## 10           Turkmenistan    86.14747       67.64105
## 11                Romania    87.77910       67.64105
## 12                 Serbia    88.14379       75.48780
## 13                   Oman    88.50000       77.12183
## 14 Bosnia and Herzegovina    88.57549       76.64029
## 15             Montenegro    89.70820       76.86432
## 16             Costa Rica    89.80124       79.60890
p9<-cw_middle %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=wateraccess,col=country), alpha=0.72)+
  ggtitle("Life Expectancy by Water Access")
ggplotly(p9)
cw_middle%>% 
  ggplot(aes(wateraccess,lifeexpectancy)) + 
  geom_point()+
  geom_smooth(method=lm,se=T)+
  ggtitle("Life Expectancy by Clean Water Access")+
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy")
## `geom_smooth()` using formula 'y ~ x'

Statistics

m1 <- lm(wateraccess~lifeexpectancy, cw_bottom10)
summary(m1)
## 
## Call:
## lm(formula = wateraccess ~ lifeexpectancy, data = cw_bottom10)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.235  -5.199   3.451   6.129  12.240 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    -45.6814    32.9682  -1.386   0.2033  
## lifeexpectancy   1.0991     0.5008   2.195   0.0595 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.778 on 8 degrees of freedom
## Multiple R-squared:  0.3758, Adjusted R-squared:  0.2978 
## F-statistic: 4.816 on 1 and 8 DF,  p-value: 0.0595
m2 <- lm(wateraccess~lifeexpectancy, cw_top10)
summary(m2)
## 
## Call:
## lm(formula = wateraccess ~ lifeexpectancy, data = cw_top10)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64583 -0.45755  0.00614  0.46096  0.66550 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    97.52346    5.41611   18.01 5.98e-09 ***
## lifeexpectancy  0.02423    0.06739    0.36    0.727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5264 on 10 degrees of freedom
## Multiple R-squared:  0.01276,    Adjusted R-squared:  -0.08596 
## F-statistic: 0.1293 on 1 and 10 DF,  p-value: 0.7267
m3 <- lm(wateraccess~lifeexpectancy, cw_middle)
summary(m3)
## 
## Call:
## lm(formula = wateraccess ~ lifeexpectancy, data = cw_middle)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9823  -5.9208   0.8609   5.3109   8.7943 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     46.5913    38.0029   1.226    0.240
## lifeexpectancy   0.4789     0.5127   0.934    0.366
## 
## Residual standard error: 6.894 on 14 degrees of freedom
## Multiple R-squared:  0.05866,    Adjusted R-squared:  -0.008582 
## F-statistic: 0.8724 on 1 and 14 DF,  p-value: 0.3661
m4 <- lm(wateraccess~lifeexpectancy, data = sw_le2)
summary(m4)
## 
## Call:
## lm(formula = wateraccess ~ lifeexpectancy, data = sw_le2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.107  -6.213   2.114   7.393  32.846 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -143.0949    20.5176  -6.974 8.37e-10 ***
## lifeexpectancy    2.9400     0.2701  10.884  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.83 on 79 degrees of freedom
## Multiple R-squared:  0.5999, Adjusted R-squared:  0.5949 
## F-statistic: 118.5 on 1 and 79 DF,  p-value: < 2.2e-16
m5 <- lm(wateraccess~lifeexpectancy, data = sw_le3)
summary(m5)
## 
## Call:
## lm(formula = wateraccess ~ lifeexpectancy, data = sw_le3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.107  -6.213   2.114   7.393  32.846 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -143.0949    20.5176  -6.974 8.37e-10 ***
## lifeexpectancy    2.9400     0.2701  10.884  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.83 on 79 degrees of freedom
## Multiple R-squared:  0.5999, Adjusted R-squared:  0.5949 
## F-statistic: 118.5 on 1 and 79 DF,  p-value: < 2.2e-16
chisq.test(sw_le2$wateraccess, sw_le2$lifeexpectancy)
## Warning in chisq.test(sw_le2$wateraccess, sw_le2$lifeexpectancy): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  sw_le2$wateraccess and sw_le2$lifeexpectancy
## X-squared = 5994, df = 5928, p-value = 0.2709
chisq.test(cw_top10$wateraccess, cw_top10$lifeexpectancy)
## Warning in chisq.test(cw_top10$wateraccess, cw_top10$lifeexpectancy): Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  cw_top10$wateraccess and cw_top10$lifeexpectancy
## X-squared = 108, df = 99, p-value = 0.252
chisq.test(cw_bottom10$wateraccess, cw_bottom10$lifeexpectancy)
## Warning in chisq.test(cw_bottom10$wateraccess, cw_bottom10$lifeexpectancy): Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  cw_bottom10$wateraccess and cw_bottom10$lifeexpectancy
## X-squared = 90, df = 81, p-value = 0.2313
chisq.test(cw_middle$wateraccess, cw_middle$lifeexpectancy)
## Warning in chisq.test(cw_middle$wateraccess, cw_middle$lifeexpectancy): Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  cw_middle$wateraccess and cw_middle$lifeexpectancy
## X-squared = 208, df = 195, p-value = 0.249

DS Plot

library("dslabs")
ds_theme_set()
sw_le2 %>% ggplot(aes(x = wateraccess, y = lifeexpectancy, label = country)) +
  geom_abline(lty=2, col="darkgrey") +
  geom_point(aes(color=country), size = 3) +
  xlab("Clean Water Access ") +
  ylab("Life Expectancy") +
  ggtitle("Clean water access & Life expectancy") +
  scale_color_discrete(name="country")

ds_theme_set()
sw_le2 %>% ggplot(aes(x = wateraccess, y = lifeexpectancy, label = country)) +
  geom_point(aes(color=country))

ds_theme_set(new = "theme_bw", args = NULL, base_size = 11, bold_title = TRUE)
p11<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=country,col=continent, guides=continent), alpha=0.5)+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()
## Warning: Ignoring unknown aesthetics: guides
ggplotly(p11)
## Warning: Using size for a discrete variable is not advised.
p12<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy, size=wateraccess, fill=continent), alpha=0.5)+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()

ggplotly(p12)
p12a<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy, size=wateraccess, fill=continent), alpha=0.5)+
  geom_abline()+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()

ggplotly(p12a)
p13<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=country, fill=continent), alpha=0.5)+
  geom_abline(method=m5)+
  lims(x=c(0, 100), y = c(0, 100))+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()
## Warning: Ignoring unknown parameters: method
ggplotly(p13)
## Warning: Using size for a discrete variable is not advised.
p13a<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=country, fill=continent), alpha=0.5)+
  geom_jitter(aes(x=wateraccess, y=lifeexpectancy))+
  lims(x=c(0, 100), y = c(0, 100))+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()
  

ggplotly(p13a)
## Warning: Using size for a discrete variable is not advised.

Final Plot for Analysis:

p13b<-sw_le3 %>%
  ggplot()+
  geom_point(aes(x=wateraccess,y=lifeexpectancy,size=country, fill=continent), alpha=0.5)+
  geom_smooth(aes(x=wateraccess, y=lifeexpectancy),se=FALSE, lwd=0.5, col="black")+
  geom_abline(method=lm)+
  lims(x=c(0, 100), y = c(0, 100))+
  ggtitle("Life Expect. by Water Access")+
  xlab("Clean Water Access (% Pop.)")+
  ylab("Life Expectancy (years)")+
  scale_fill_brewer()
## Warning: Ignoring unknown parameters: method
ggplotly(p13b)
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Final Plot for Analysis Refined and Zoomed In:

legendtheme <- theme(legend.title = element_text(color = "black", size = 10, face = "bold"), legend.position="bottom")+
theme_replace(legend.title = "Continent")
                 
# guide_legend(title=waiver())
# text='<b> Continent </b>'
# fig <- fig %>% layout(legend=list(title=list(text='<b> Trend </b>')))
# %+replace%
p14<-sw_le3 %>%
  ggplot()+
  legendtheme+
  geom_point(aes(x=wateraccess, y=lifeexpectancy, size=wateraccess, fill=continent, text = paste("Country:", country, '</br>', '</br>Clean Water Access:', wateraccess, '</br>Life Expectancy:', lifeexpectancy)), alpha=0.5)+
  geom_smooth(aes(x=wateraccess, y=lifeexpectancy),se=FALSE, lwd=0.5, col="black")+
  geom_abline(method=lm)+
  ggtitle("Life Expectancy & Clean Water Access")+
  theme(plot.title = element_text(hjust = 0.5))+
  labs(x="Clean Water Access (% Pop.)", y="Life Expectancy (years)", col="Continent", guide_legend="Continent")+
  scale_fill_brewer()
## Warning: Ignoring unknown aesthetics: text
## Warning: Ignoring unknown parameters: method
ggplotly(p14, tooltip = "text") 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'