Forecast South African GDPpc in 2030

options(
  digits = 3
)

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.3     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

## Loading required package: weights

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: assertthat

## 
## Attaching package: 'assertthat'

## The following object is masked from 'package:tibble':
## 
##     has_name

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## Loading required package: metafor

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)

## Loading required package: rlang

## 
## Attaching package: 'rlang'

## The following object is masked from 'package:magrittr':
## 
##     set_names

## The following object is masked from 'package:assertthat':
## 
##     has_name

## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice

## 
## Attaching package: 'kirkegaard'

## The following object is masked from 'package:rlang':
## 
##     is_logical

## The following object is masked from 'package:psych':
## 
##     rescale

## The following object is masked from 'package:assertthat':
## 
##     are_equal

## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric

## The following object is masked from 'package:base':
## 
##     +

library(rms)

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 
## Attaching package: 'rms'

## The following object is masked from 'package:metafor':
## 
##     vif

theme_set(theme_bw())

#data from world bank
d = read_csv("data/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_2765238.csv", skip = 3) %>% 
  df_legalize_names()

## New names:
## * `` -> ...66

## Rows: 266 Columns: 66

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Country Name, Country Code, Indicator Name, Indicator Code
## dbl (31): 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ...
## lgl (31): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#pull SA data
SA = filter(d, Country_Code == "ZAF") %>% 
  select(x1960:x2020) %>% 
  pivot_longer(cols = everything(), names_to = "year") %>% 
  mutate(
    year = str_replace(year, "x", "") %>% as.numeric()
  )

#fit linear model
(fit_ols = ols(value ~ year, data = SA))

## Frequencies of Missing Values Due to Each Variable
## value  year 
##    30     0 
## 
## Linear Regression Model
##  
##  ols(formula = value ~ year, data = SA)
##  
##  
##                    Model Likelihood    Discrimination    
##                          Ratio Test           Indexes    
##  Obs        31    LR chi2     48.97    R2       0.794    
##  sigma614.9497    d.f.            1    R2 adj   0.787    
##  d.f.       29    Pr(> chi2) 0.0000    g     1392.470    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1742.1  -376.7  -115.3   532.1  1004.5 
##  
##  
##            Coef         S.E.       t      Pr(>|t|)
##  Intercept -250490.7823 24758.9626 -10.12 <0.0001 
##  year          130.5441    12.3485  10.57 <0.0001 
##

#predict values
preds = data.frame(year = 1990:2030)
preds = bind_cols(
  preds,
  rms:::predict.ols(fit_ols, newdata = preds, conf.int = .95) %>% as.data.frame() %>% rename(value = linear.predictors)
) %>% as_tibble()

preds

#merge data and plot
bind_rows(
  SA %>% select(year, value) %>% mutate(type = "observed"),
  preds %>% mutate(type = "linear model")
) %>% 
  filter(year %in% seq(1990, 2030)) %>% 
  ggplot(aes(year, value, color = type)) +
  geom_point() +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = .1) +
  geom_line() +
  ylab("GDPpc (2017 USD)") +
  ggtitle(
    "GDP per capita forecast for South Africa for 2030",
    subtitle = "Data from World Bank, prediction interval 95%"
  )

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf