miss = anti_join(IMR_long,
TFR_long,
by = c("country_name","year"))
nrow(miss)
## [1] 0
IMR_TFR = IMR_long %>%
left_join(TFR_long,
by = c("country_name",
"country_code",
"year"))
glimpse(IMR_TFR)
## Rows: 16,226
## Columns: 5
## $ country_name <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Ar…
## $ country_code <chr> "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "…
## $ year <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 196…
## $ IMR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TFR <dbl> 4.820, 4.655, 4.471, 4.271, 4.059, 3.842, 3.625, 3.417, 3…
Original Graph
g = IMR_TFR %>%
ggplot(aes(IMR,TFR,group = country_name)) +
geom_point(size = .1)
ggplotly(g)
Improvement 1: Adding a smooth curve
g1 = IMR_TFR %>%
ggplot(aes(x = IMR, y = TFR)) +
geom_point(size = 0.1) +
geom_smooth()
ggplotly(g1)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 4428 rows containing non-finite values (`stat_smooth()`).
## Warning: The following aesthetics were dropped during statistical transformation:
## x_plotlyDomain, y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Improvement 2: Indicating highest TFR
topTMR <- IMR_TFR %>%
arrange(desc(TFR)) %>%
slice(1)
topTMR$TFR
## [1] 8.864
g2 = IMR_TFR %>%
ggplot(aes(x = IMR, y = TFR)) +
geom_point(size = 0.1)+
geom_hline(yintercept= topTMR$TFR, linetype = "dashed") +
annotate("text", x= 80, y= topTMR$TFR + 0.3, label= paste("Max TMR = ", topTMR$TFR, "Country: ", topTMR$country_name), size= 3, color="black")
ggplotly(g2)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the plotly package.
## Please report the issue at <]8;;https://github.com/plotly/plotly.R/issueshttps://github.com/plotly/plotly.R/issues]8;;>.
Improvement 3: Changing the theme
g3 = IMR_TFR %>%
ggplot(aes(x = IMR, y = TFR)) +
geom_point(size = 0.1)+
ggthemes::theme_economist()
ggplotly(g3)