#load the library and import the data set

library(readxl)
gnp = read_excel("C:\\Users\\user\\Downloads\\GNP.xlsx")
gnp
## # A tibble: 12 x 5
##    country     birth_rate GNP_p.c. growth income_ratio
##    <chr>            <dbl>    <dbl>  <dbl>        <dbl>
##  1 Brazil              30     2200    5.1          9.5
##  2 Colombia            29     1380    3.2          6.8
##  3 Costa Rica          30     1430    3            4.6
##  4 India               35      260    1.4          3.1
##  5 Mexico              36     2250    3.8          5  
##  6 Peru                36     1170    1            8.7
##  7 Philippines         34      790    2.8          3.8
##  8 Senegal             48      430   -0.3          6.4
##  9 South Korea         24     1700    6.9          2.7
## 10 Sri Lanka           27      300    2.5          2.3
## 11 Taiwan              21     1170    6.2          3.8
## 12 Thailand            30      770    4.6          3.3

#———– Problem A ———– #

## Determine the relationship between birth rate and growth. 
## Find the value of R. Interpret the result. 

scatter.smooth(x=gnp$growth, 
               y=gnp$birth_rate, 
               main="Scatter Plot")

cor(gnp$birth_rate, gnp$growth)
## [1] -0.8235457
# Based on the value of r which is -0.8235457, the birth rate and growth rate has a 
# strong negative relationship. This means that as the growth rate increases 
# the birth rate decreases. 

#———– Problem B ———– #

## State the simple linear regression equation for 
## the income ratio (IR), for GNP and for growth.

# For income ratio:

linearMod_income_ratio <-lm(birth_rate~income_ratio, data=gnp)
linearMod_income_ratio
## 
## Call:
## lm(formula = birth_rate ~ income_ratio, data = gnp)
## 
## Coefficients:
##  (Intercept)  income_ratio  
##       26.443         1.045
# The simple linear regression equation for 
# the income ratio (IR) is Y(hat) = 26.44 + 1.05X 
# or birth_rate = 26.44 + 1.05 * income_ratio

# For GNP:

linearMod_gnp <-lm(birth_rate~GNP_p.c., data=gnp)
linearMod_gnp 
## 
## Call:
## lm(formula = birth_rate ~ GNP_p.c., data = gnp)
## 
## Coefficients:
## (Intercept)     GNP_p.c.  
##   34.720760    -0.002646
# The simple linear regression equation for 
# the GNP is Y(hat) = 34.72 + (-0.00)X 
# or birth_rate = 34.72+ (-0.00) * GNP

# For growth: 

linearMod_growth <-lm(birth_rate~growth, data=gnp)
linearMod_growth
## 
## Call:
## lm(formula = birth_rate ~ growth, data = gnp)
## 
## Coefficients:
## (Intercept)       growth  
##       40.71        -2.70
# The simple linear regression equation for 
# the growth is Y(hat) = 40.71 + X(-2.70) 
# or birth_rate = 40.71 + (-2.70) * growth 

#———– Problem C ———– #

## Find the value of beta sub 0 and beta sub 1 for the regression of the birth rate on the growth.

linearMod<-lm(birth_rate~growth, data=gnp)
linearMod
## 
## Call:
## lm(formula = birth_rate ~ growth, data = gnp)
## 
## Coefficients:
## (Intercept)       growth  
##       40.71        -2.70
summary(linearMod)
## 
## Call:
## lm(formula = birth_rate ~ growth, data = gnp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9617 -2.7017 -0.5417  2.2034  6.4783 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  40.7117     2.3031  17.677 7.15e-09 ***
## growth       -2.7000     0.5881  -4.591 0.000994 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.132 on 10 degrees of freedom
## Multiple R-squared:  0.6782, Adjusted R-squared:  0.6461 
## F-statistic: 21.08 on 1 and 10 DF,  p-value: 0.0009936
# The beta sub 0 is 40.71
# The beta sub 1 is -2.70

#———– Problem D ———– #

## Interpret the slope and intercept for the regression of 
##the birth rate on the growth.

# The slope is -2.70 and the intercept in 40.71 for the regression 
# of the birth rate on the growth rate. This goes to show that 
#as birth rate decreases by 5, the growth rate increases by 1. 

#———– Problem E ———– #

## Calculate the R^2 value for the regression of the birth rate on the growth. Interpret the result.

summary(linearMod)
## 
## Call:
## lm(formula = birth_rate ~ growth, data = gnp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9617 -2.7017 -0.5417  2.2034  6.4783 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  40.7117     2.3031  17.677 7.15e-09 ***
## growth       -2.7000     0.5881  -4.591 0.000994 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.132 on 10 degrees of freedom
## Multiple R-squared:  0.6782, Adjusted R-squared:  0.6461 
## F-statistic: 21.08 on 1 and 10 DF,  p-value: 0.0009936
# The r^2 is 0.6782. This means that the variation in birth rate 
# is reduced by 67.82 percent when growth rate is considered.