Part One - Summaries
#filter the dataset into countries and take values for population growth, investment and income
pwt <- read.csv ("pwt71_wo_country_names_wo_g_vars.csv")
pwt2 <- pwt %>% filter (between (year, 1985, 2010))%>%
group_by (isocode) %>%
summarise (n=log(last(POP)/first(POP))/n(),
i=mean(ki),
y=last (y)) %>% filter (!is.na(i))
# Plot population
pwt2 %>% ggplot (aes(y=n, x=isocode, label=isocode))+
geom_point()+
theme(axis.text.x = element_blank(),panel.grid.major = element_blank(),
panel.grid.minor = element_line(colour = "black"),
panel.background = element_rect(fill = 'cornsilk3', colour = 'black'),
axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(n))) + theme(legend.position="none") + scale_colour_discrete(l=40)
# Plot Investment
pwt2 %>% ggplot (aes(y=i, x=isocode, label=isocode))+
geom_point()+
theme(axis.text.x = element_blank(),panel.grid.major = element_blank(),
panel.grid.minor = element_line(colour = "black"),
panel.background = element_rect(fill = 'aliceblue', colour = 'black'),
axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(i))) + theme(legend.position="none") + scale_colour_discrete(l=40)
#PLot for Income
pwt2 %>% ggplot (aes(y=y, x=isocode,label=isocode))+
geom_point()+
theme(axis.text.x = element_blank(),panel.grid.major = element_blank(),
panel.grid.minor = element_line(colour = "black"),
panel.background = element_rect(fill = 'bisque2', colour = 'black'),
axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(y))) + theme(legend.position="none") + scale_colour_discrete(l=40)
###Part Two - Regressions
i<- log(pwt2$i)
n<- pwt2$n
y<- log(pwt2$y)
d<-rep(0.05, 159)
g<-rep(0.015, 159)
dep<-log(n+d+g)
LinMod1<- lm(y~i+dep)
summary(LinMod1)
##
## Call:
## lm(formula = y ~ i + dep)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.59339 -0.73828 0.00462 0.64332 3.06431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.4544 1.6930 -9.128 3.40e-16 ***
## i 0.9984 0.1925 5.186 6.59e-07 ***
## dep -5.9801 0.6719 -8.900 1.33e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.001 on 156 degrees of freedom
## Multiple R-squared: 0.4483, Adjusted R-squared: 0.4412
## F-statistic: 63.38 on 2 and 156 DF, p-value: < 2.2e-16
#Estimating model to find estimated income shares for capital and labour
LinMod2 <- nls (y ~ (a/(1 - a)) * i + (a/(1 - a)) * dep,
start = list(a = 0.4), control = nls.control(warnOnly = T))
summary (LinMod2)
##
## Formula: y ~ (a/(1 - a)) * i + (a/(1 - a)) * dep
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 0.77245 0.01114 69.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.879 on 158 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 8.588e-08
#Estimating Model With intercept
LinMod3 <- nls (y ~ A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep,
start = list(a = 0.4, A0 = 1), control = nls.control(warnOnly = T))
summary (LinMod3)
##
## Formula: y ~ A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 0.43813 0.07868 5.569 1.09e-07 ***
## A0 2.26062 0.17259 13.098 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.303 on 157 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 6.247e-09
#Create variable Y_hat
a<-rep(0.43813, 159)
A0<-rep(2.26062, 159)
PredY<-exp(A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep)
#Add to data frame
pwt3 <- pwt2 %>% ungroup %>%
mutate(FittedY = PredY [])
#Plot Fitted Vs. Actual
#PLot for Income
pwt3 %>% ggplot (aes(y, FittedY))+
geom_point()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_line(colour = "black"),
panel.background = element_rect(fill = 'cadetblue2', colour = 'black'),
axis.line = element_line(colour = "black"))
The Model does not fit the data well - there is minimal correlation between the fitted values for y and the observed values.
#Add Variable - 'residuals'
pwt3 <- pwt3 %>%
mutate(residuals= FittedY - y)
#Plot histogram
Residuals <- pwt3$residuals
hist(Residuals)