# Loading Penn World Table Data.
pwt <- read.csv("pwt71.csv")
# Creating a new dataset titled pwt.ss (subset).
pwt.ss <- pwt %>% filter(between (year, 1985, 2010)) %>%
group_by(isocode) %>%
summarise(n = log(last(POP)/first(POP))/n(),
inv = mean(ki)/100,
y = last(y)) %>% filter(!is.na(inv))
# n = Continuous Compounding Growth Rate.
# inv = Average Investment as a Percentage of GDP scaled to same order of magnitude.
# y = income per capita (in purchasing power terms) relative to the United States in 2010.
# Scatterplot of Investment on Population Growth rate.
pwt.ss %>% ggplot(aes(x = inv, y = n)) +
geom_point()
# Scatterplot of Investment on Income per Capita.
pwt.ss %>% ggplot(aes(x = inv, y = y)) +
geom_point()
# Scatterplot of Population Growth Rate on Income per Capita.
pwt.ss %>% ggplot(aes(x = n, y = y)) +
geom_point()
Basic Linear Solow Model:
pwt.ss <- pwt.ss %>%
mutate(logY = log(y),
logS = log(inv),
g = 0.03,
delta = 0.02,
logngd = log(n+g+delta))
# The logic here is to take the dataset generated in part one and then add new columns logY, logS, g, delta, alpha and logngd (this being (n + g+ delta))
# The values for delta and g are taken from assumptions used in 'Contributions to the Empirics of Economic Growth (1992)' by Mankiw, Romer and Weil.
# This is the generation of the linear model.
linmod <- lm(logY ~ logS + logngd, data = pwt.ss)
summary(linmod)
##
## Call:
## lm(formula = logY ~ logS + logngd, data = pwt.ss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.58394 -0.74338 -0.00635 0.64977 3.06547
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.9330 1.5663 -5.703 5.73e-08 ***
## logS 1.0019 0.1928 5.198 6.24e-07 ***
## logngd -4.8203 0.5439 -8.863 1.67e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.002 on 156 degrees of freedom
## Multiple R-squared: 0.4467, Adjusted R-squared: 0.4396
## F-statistic: 62.98 on 2 and 156 DF, p-value: < 2.2e-16
The non-linear Solow model with previously used variables:
nonlinmod <- nls(logY ~ (alpha/(1-alpha))*logS - (alpha/(1-alpha))*logngd, data = pwt.ss, start = list(alpha = 0.4), control = nls.control(warnOnly = T), subset = !is.na(logS))
summary(nonlinmod)
##
## Formula: logY ~ (alpha/(1 - alpha)) * logS - (alpha/(1 - alpha)) * logngd
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 0.683072 0.007119 95.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.152 on 158 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 6.132e-10
# This is the same model with an intercept term of A0, where A0 is starting at 1
nonlinmodA0 <- nls(logY ~ A0 + (alpha/(1-alpha))*logS - (alpha/(1-alpha))*logngd, data = pwt.ss, start = list(alpha = 0.4, A0 = 1), control = nls.control(warnOnly = T), subset = !is.na(logS))
summary(nonlinmodA0)
##
## Formula: logY ~ A0 + (alpha/(1 - alpha)) * logS - (alpha/(1 - alpha)) *
## logngd
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 0.61202 0.02847 21.50 < 2e-16 ***
## A0 0.79943 0.24373 3.28 0.00128 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.118 on 157 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 1.671e-07
# Adding a predicted value for y in the table
pwt.ss <- pwt.ss %>%
mutate(ypredicted = predict(nonlinmodA0))
# plotting logy against the predicted value of y
pwt.ss %>% ggplot(aes(x = logY, y = ypredicted)) +
geom_point() + geom_smooth(method = "lm")
The plotting of \(y\) vs. predicted value of y show that there is a strong correlation between the model’s predictions and it’s actual results.
#finding the Residuals
pwt.ss <- pwt.ss %>%
mutate(residual = logY - ypredicted)
#Historgram of residuals
pwt.ss %>% ggplot(aes(x = residual)) +
geom_histogram()
# plotting the residuals
pwt.ss %>% ggplot(aes(x = ypredicted, y = residual)) +
geom_point() + geom_hline(yintercept = 0)
The scatterplot seems random enough to indicate that the residuals and the fitted values are uncorrelated.
#Finding the standard deviation
sigma <- sd(pwt.ss$residual, na.rm = TRUE)
# Standard Deviation is 1.14681921927611
# Generating new version of countries
pwt.ss <- pwt.ss %>%
mutate(Newcountry = exp(rnorm(n = n(), mean = ypredicted, sd = sigma)))
# Histogram of GDP per capita relative to the US
pwt.ss %>% ggplot(aes(x = y)) +
geom_histogram()
# Histogram of new countries
pwt.ss %>% ggplot(aes(x = Newcountry)) +
geom_histogram()
The “new countries” and the actual GDP having a very similar look, with most of the data on the right hand side in both. The y vs predicted y shows a symetrical diagnal distribution around the line and the residual plot seems sufficiently random. This information means with some certainty we can say the model is likely a good fit.