#Import dplyr and ggplot libraries
library(dplyr); library(ggplot2)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Load Penn World Table data
pwt <- read.csv ("pwt71.csv")
#Filter out data before 1985
pwt2 <- pwt %>% filter (year>=1985)
#Group data by country
pwt3 <- pwt2 %>% group_by(isocode) %>%
#Generate average of investment for each country 1985 to 2010
summarise (s = mean(ki),
#Calculate average population growth where t=25.
n = ((last(POP)/first(POP))^(1/25)-1),
#Calculate PPP GDP per capita relative to U.S. in 2010 (G-K estimate of PPP GDP used)
y_obs = last(y),
#Calculate n+g+d assuming d=0.05 & g=0.016
ngd = n+0.068)
#Convert ngd to an index number so as to have the same units as y_obs and s
pwt3$ngd <- pwt3$ngd*100
In “pwt3”:
pwt3 %>% ggplot(aes(x = s, y = ngd)) + geom_point()
## Warning: Removed 31 rows containing missing values (geom_point).
#Calculate logs of y, s, and n+g+d
pwt3$lnngd <- log(pwt3$ngd)
pwt3$lny <- log(pwt3$y_obs)
pwt3$lns <- log(pwt3$s)
#Perform first regression
Log_Linear_Solow <- lm(lny ~ lns + lnngd, data = pwt3)
#Check β estimate
summary(Log_Linear_Solow)
##
## Call:
## lm(formula = lny ~ lns + lnngd, data = pwt3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.59644 -0.73697 0.01147 0.64173 3.06365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.2646 1.6345 7.504 4.43e-12 ***
## lns 0.9975 0.1925 5.183 6.68e-07 ***
## lnngd -5.9339 0.6660 -8.910 1.26e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1 on 156 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.4487, Adjusted R-squared: 0.4416
## F-statistic: 63.48 on 2 and 156 DF, p-value: < 2.2e-16
#Perform second regression
nonlinmod <- nls(formula = lny ~ log(A) + (alpha/(1-alpha))*lns - (alpha/(1-alpha))*lnngd, data = pwt3, start = list(alpha = 0.4, A = 1), control = nls.control(warnOnly = T), subset = !is.na(lns))
#Check β estimate
summary(nonlinmod)
##
## Formula: lny ~ log(A) + (alpha/(1 - alpha)) * lns - (alpha/(1 - alpha)) *
## lnngd
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 0.60950 0.03007 20.267 < 2e-16 ***
## A 3.37702 0.69943 4.828 3.24e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.135 on 157 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 5.331e-09
Make a Scatter Plot of lny vs lny_predicted
Mutate wasn’t functioning properly (or more likely being used improperly), so a workaround has been used here involving generating a vector for lny_predicted, then adding that as a column in the model.
First a new table (pwt4) is created by removing all rows containing NA values from pwt3. Secondly, the vector lny_predicted is created for the remaining countries. Thirdly, that vector is added to the data frame pwt4 as a column. Lastly a scatter plot is created of lny vs lny_predicted.
#crop rows with NAs out of table
pwt4 <- na.omit(pwt3)
#Generate predicted values of y from second (nonlinear) model
lny_predicted <- predict(nonlinmod)
#Add predicted y values to data frame
pwt4$lny_predicted <- lny_predicted
#Make scatter plot of y_predicted vs lny
pwt4 %>% ggplot(aes(x = lny_predicted, y = lny )) +
geom_point()
The model seems to fit the data passably, but not exceptionally well.
Generate a histogram of the errors from the model
Once more, mutate was causing problems, so a workaround has been used. The error term is generated by making a vector of y_obs, then generating a new column (error) in the table pwt of y_obs - exp(lny_predicted). The term exp(lny_predicted) is used to obtain the predicted y value as an index number.
#Generate error variable, error = y_obs-exp(lny_predicted)
#Make vector of y_obs
y_obs <- pwt4$y_obs
#Add error column to pwt4
pwt4$error <- y_obs - exp(lny_predicted)
#Generate a histogram of the errors
hist(pwt4$error, xlim = c(-50, 50), breaks = 70)
Generate a Scatter Plot of y_obs vs y_predicted
This is done by generating y_predicted as a column in pwt4, then generating a scatter plot with a fitted line.
#Generate y_predicted by taking exp of lny_predicted
pwt4$y_predicted <- exp(lny_predicted)
#Generate a plot of y_obs vs y_predicted with a fit line to ilustrate errors
ggplot(pwt4, aes(x=y_predicted, y=y_obs)) +
geom_point(shape=1) +
geom_smooth(method=lm)
There were errors ocurring when attempting to generate a ‘new’ country in the next section, and time limitations mean that this is as far as we made it with the tutorial work.