Homework week 5

Group 5

Nicholas Russell, Matthew Kourlinins, Nicola Simpson, Jack Nguyen

#select libraries to use within test

library(dplyr); library(ggplot2)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#open data set pwt71.csv in R studio, and name data set pwt

pwt <- read.csv("pwt71.csv")

#filter the data of pwt to only include years more recent or equal to 1985, 
#group all data together for each nation, summarise this grouped data to give population growth,
#investment share of ppp, and the ppp converted gdp per capita in 2010 

pwt.ss <- pwt %>% filter(year>=1985) %>%
  group_by(isocode) %>%
  summarise(n = log(last(POP)/first(POP))/n(),
            Inv = mean(ki)/100,
            y = last(y)) %>% filter(!is.na(Inv))

#test plot the data set pwt.ss

qplot(Inv, n, data = pwt.ss)

pwt.ss %>% ggplot(aes(x = Inv)) +
  geom_density()

#create a new data set by working out the log of required numbers, as well as including g and delta

g <- 0.03
delta <- 0.02

pwt.ss2 <- pwt.ss %>% 
  mutate(lnS = log(Inv),
         lnY = log(y),
         lnNGD = log(n + g + delta))

#create a linear model using the natural log of investment plus the natural log of n + g + delta

linear_model <- lm(lnY ~ lnS + lnNGD, data = pwt.ss2)

#view data created with model through summary

summary(linear_model)

## 
## Call:
## lm(formula = lnY ~ lnS + lnNGD, data = pwt.ss2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.58394 -0.74338 -0.00635  0.64977  3.06547 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.9330     1.5663  -5.703 5.73e-08 ***
## lnS           1.0019     0.1928   5.198 6.24e-07 ***
## lnNGD        -4.8203     0.5439  -8.863 1.67e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.002 on 156 degrees of freedom
## Multiple R-squared:  0.4467, Adjusted R-squared:  0.4396 
## F-statistic: 62.98 on 2 and 156 DF,  p-value: < 2.2e-16

#create a non-linear model from data set pwt.ss2, then set value of alpha for equation to 0.4

nonlinear_model <- nls(lnY ~ (alpha/(1-alpha)) * lnS - (alpha/(1-alpha)) * lnNGD,
                       data = pwt.ss2, start = list(alpha = 0.4))

#view data created with model through summary

summary(nonlinear_model)

## 
## Formula: lnY ~ (alpha/(1 - alpha)) * lnS - (alpha/(1 - alpha)) * lnNGD
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## alpha 0.683072   0.007119   95.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.152 on 158 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 6.132e-10

#create second non-linear model from data set pwt.ss2, then set value of alpha for equation to 0.4 and A to 1
linear_model2 <- nls(lnY ~ log(A) + (alpha/(1-alpha)) * lnS - (alpha/(1-alpha)) * lnNGD,
                     data = pwt.ss2, start = list(A = 1, alpha = 0.4), control = nls.control(warnOnly = T))

#view data created with model through summary
summary(linear_model2)

## 
## Formula: lnY ~ log(A) + (alpha/(1 - alpha)) * lnS - (alpha/(1 - alpha)) * 
##     lnNGD
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## A      2.22427    0.54212   4.103 6.54e-05 ***
## alpha  0.61202    0.02847  21.499  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.118 on 157 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 1.125e-08