Part One - Summaries

#filter the dataset into countries and take values for population growth, investment and income
pwt <- read.csv ("pwt71_wo_country_names_wo_g_vars.csv")
pwt2 <- pwt %>% filter (between (year, 1985, 2010))%>%
        group_by (isocode) %>%
        summarise (n=log(last(POP)/first(POP))/n(),
        i=mean(ki),
        y=last (y)) %>% filter (!is.na(i))

# Plot population

pwt2 %>% ggplot (aes(y=n, x=isocode, label=isocode))+
            geom_point()+
            theme(axis.text.x = element_blank(),panel.grid.major = element_blank(), 
            panel.grid.minor = element_line(colour = "black"), 
            panel.background = element_rect(fill = 'cornsilk3', colour = 'black'), 
            axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(n))) + theme(legend.position="none") + scale_colour_discrete(l=40)

# Plot Investment

pwt2 %>% ggplot (aes(y=i, x=isocode, label=isocode))+
            geom_point()+
            theme(axis.text.x = element_blank(),panel.grid.major = element_blank(), 
            panel.grid.minor = element_line(colour = "black"), 
            panel.background = element_rect(fill = 'aliceblue', colour = 'black'), 
            axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(i))) + theme(legend.position="none") + scale_colour_discrete(l=40)

#PLot for Income
pwt2 %>% ggplot (aes(y=y, x=isocode,label=isocode))+
            geom_point()+
            theme(axis.text.x = element_blank(),panel.grid.major = element_blank(), 
            panel.grid.minor = element_line(colour = "black"), 
            panel.background = element_rect(fill = 'bisque2', colour = 'black'), 
            axis.line = element_line(colour = "black")) + geom_text(size=4, aes(colour=factor(y))) + theme(legend.position="none") + scale_colour_discrete(l=40)

###Part Two - Regressions

i<- log(pwt2$i)
n<- pwt2$n
y<- log(pwt2$y)
d<-rep(0.05, 159)
g<-rep(0.015, 159)
dep<-log(n+d+g)

LinMod1<- lm(y~i+dep)
summary(LinMod1)
## 
## Call:
## lm(formula = y ~ i + dep)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.59339 -0.73828  0.00462  0.64332  3.06431 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15.4544     1.6930  -9.128 3.40e-16 ***
## i             0.9984     0.1925   5.186 6.59e-07 ***
## dep          -5.9801     0.6719  -8.900 1.33e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.001 on 156 degrees of freedom
## Multiple R-squared:  0.4483, Adjusted R-squared:  0.4412 
## F-statistic: 63.38 on 2 and 156 DF,  p-value: < 2.2e-16
#Estimating model to find estimated income shares for capital and labour
LinMod2 <- nls (y ~ (a/(1 - a)) * i + (a/(1 - a)) * dep, 
          start = list(a = 0.4), control = nls.control(warnOnly = T))

summary (LinMod2)
## 
## Formula: y ~ (a/(1 - a)) * i + (a/(1 - a)) * dep
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  0.77245    0.01114   69.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.879 on 158 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 8.588e-08
#Estimating Model With intercept
LinMod3 <- nls (y ~ A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep, 
          start = list(a = 0.4, A0 = 1), control = nls.control(warnOnly = T))

summary (LinMod3)
## 
## Formula: y ~ A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a   0.43813    0.07868   5.569 1.09e-07 ***
## A0  2.26062    0.17259  13.098  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.303 on 157 degrees of freedom
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 6.247e-09
#Create variable Y_hat
a<-rep(0.43813, 159)
A0<-rep(2.26062, 159)
PredY<-exp(A0 + (a/(1 - a)) * i + (a/(1 - a)) * dep)


#Add to data frame
pwt3 <- pwt2 %>% ungroup %>% 
      mutate(FittedY = PredY [])

#Plot Fitted Vs. Actual
#PLot for Income
pwt3 %>% ggplot (aes(y, FittedY))+
            geom_point()+
            theme(panel.grid.major = element_blank(), 
            panel.grid.minor = element_line(colour = "black"), 
            panel.background = element_rect(fill = 'cadetblue2', colour = 'black'), 
            axis.line = element_line(colour = "black")) 

The Model does not fit the data well - there is minimal correlation between the fitted values for y and the observed values.

#Add Variable - 'residuals'
pwt3 <- pwt3 %>% 
      mutate(residuals= FittedY - y)

#Plot histogram
Residuals <- pwt3$residuals
hist(Residuals)