options(
digits = 3
)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.3 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: assertthat
##
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
##
## has_name
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## Loading required package: psych
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Loading required package: metafor
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
##
## set_names
## The following object is masked from 'package:assertthat':
##
## has_name
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
##
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:rlang':
##
## is_logical
## The following object is masked from 'package:psych':
##
## rescale
## The following object is masked from 'package:assertthat':
##
## are_equal
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
## The following object is masked from 'package:base':
##
## +
library(rms)
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'rms'
## The following object is masked from 'package:metafor':
##
## vif
theme_set(theme_bw())
#data from world bank
d = read_csv("data/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_2765238.csv", skip = 3) %>%
df_legalize_names()
## New names:
## * `` -> ...66
## Rows: 266 Columns: 66
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country Name, Country Code, Indicator Name, Indicator Code
## dbl (31): 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ...
## lgl (31): 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#pull SA data
SA = filter(d, Country_Code == "ZAF") %>%
select(x1960:x2020) %>%
pivot_longer(cols = everything(), names_to = "year") %>%
mutate(
year = str_replace(year, "x", "") %>% as.numeric()
)
#fit linear model
(fit_ols = ols(value ~ year, data = SA))
## Frequencies of Missing Values Due to Each Variable
## value year
## 30 0
##
## Linear Regression Model
##
## ols(formula = value ~ year, data = SA)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 31 LR chi2 48.97 R2 0.794
## sigma614.9497 d.f. 1 R2 adj 0.787
## d.f. 29 Pr(> chi2) 0.0000 g 1392.470
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1742.1 -376.7 -115.3 532.1 1004.5
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -250490.7823 24758.9626 -10.12 <0.0001
## year 130.5441 12.3485 10.57 <0.0001
##
#predict values
preds = data.frame(year = 1990:2030)
preds = bind_cols(
preds,
rms:::predict.ols(fit_ols, newdata = preds, conf.int = .95) %>% as.data.frame() %>% rename(value = linear.predictors)
) %>% as_tibble()
preds
#merge data and plot
bind_rows(
SA %>% select(year, value) %>% mutate(type = "observed"),
preds %>% mutate(type = "linear model")
) %>%
filter(year %in% seq(1990, 2030)) %>%
ggplot(aes(year, value, color = type)) +
geom_point() +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = .1) +
geom_line() +
ylab("GDPpc (2017 USD)") +
ggtitle(
"GDP per capita forecast for South Africa for 2030",
subtitle = "Data from World Bank, prediction interval 95%"
)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
