Problem set

There is a data set ‘countries.csv’ that contains information on 60 countries of the world and their characteristics:

Data set “Countries”
variable label possible_range
country country name NA
GDP.ppp.pc.us GDP PPP per capita in US dollars 0+
Int.users.per.100 Internet users per 100 0-100
Status Freedom House status (free, partly free, or unfree) Free, Partly free, Unfree
democracy binary Przeworski index of democracy (democracy or dictatorship) Yes, No
P4 Polity IV index (ordinal scale from -10, autocracy, to +10, full democracy) [-10; 10]
df <- read.csv("countries.csv", header = T)
df$P4 <- as.factor(df$P4)
df <- df[,c(-1, -8, -9)]
labs <- c("Country name",
          "GDP per capita, PPP",
          "Internet per 100",
          "Freedom House Index",
          "Democracy-Dictatorship",
          "Polity IV")
library(sjlabelled)
df <- set_label(df, label = labs)
library(sjPlot)
view_df(df, verbose = F)
Data frame: df
ID Name Label Values Value Labels
1 country Country name Algeria
Argentina
Armenia
Australia
Azerbaijan
Bahrain
Belarus
Brazil
Chile
China
Colombia
Cyprus
Ecuador
Egypt
Estonia
<… truncated>
2 GDP.ppp.pc.us GDP per capita, PPP range: 1535.5-122609.4
3 Int.users.per.100 Internet per 100 range: 7.1-92.9
4 Status Freedom House Index Free
Partly free
Unfree
5 democracy Democracy-Dictatorship No
Yes
6 P4 Polity IV -10
-9
-7
-6
-4
-3
-2
2
3
4
5
6
7
8
9
<… truncated>
library(psych)
describe(df)
##                   vars  n     mean       sd   median  trimmed      mad
## country*             1 60    30.50    17.46    30.50    30.50    22.24
## GDP.ppp.pc.us        2 57 23234.14 21823.75 16457.09 19795.13 13988.32
## Int.users.per.100    3 58    49.26    23.42    48.70    49.17    28.32
## Status*              4 60     1.83     0.81     2.00     1.79     1.48
## democracy*           5 58     1.60     0.49     2.00     1.62     0.00
## P4*                  6 56    11.25     4.86    13.00    11.74     4.45
##                       min       max     range  skew kurtosis      se
## country*             1.00     60.00     59.00  0.00    -1.26    2.25
## GDP.ppp.pc.us     1535.48 122609.41 121073.93  2.18     6.18 2890.63
## Int.users.per.100    7.10     92.86     85.76  0.03    -0.96    3.08
## Status*              1.00      3.00      2.00  0.30    -1.43    0.10
## democracy*           1.00      2.00      1.00 -0.41    -1.86    0.06
## P4*                  1.00     16.00     15.00 -0.74    -0.89    0.65

0. Scatterplot and correlation

plot(Int.users.per.100 ~ GDP.ppp.pc.us, data = df)

cor.test(df$Int.users.per.100, df$GDP.ppp.pc.us)
## 
##  Pearson's product-moment correlation
## 
## data:  df$Int.users.per.100 and df$GDP.ppp.pc.us
## t = 6.8129, df = 55, p-value = 7.637e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5049674 0.7966539
## sample estimates:
##       cor 
## 0.6765198

1. Regress Internet users on GPD PPP (natural scales)

regr <- lm(Int.users.per.100 ~ GDP.ppp.pc.us, data = df)
summary(regr)
## 
## Call:
## lm(formula = Int.users.per.100 ~ GDP.ppp.pc.us, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.410 -10.327   0.806  12.070  28.745 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.307e+01  3.357e+00   9.850 9.57e-14 ***
## GDP.ppp.pc.us 7.205e-04  1.058e-04   6.813 7.64e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.27 on 55 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.4577, Adjusted R-squared:  0.4478 
## F-statistic: 46.42 on 1 and 55 DF,  p-value: 7.637e-09
anova(regr)
## Analysis of Variance Table
## 
## Response: Int.users.per.100
##               Df Sum Sq Mean Sq F value    Pr(>F)    
## GDP.ppp.pc.us  1  13846 13846.1  46.416 7.637e-09 ***
## Residuals     55  16407   298.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr)

plot_model(regr)

df[40,1]
## [1] Qatar
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe
df[47,1]
## [1] South Korea
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe
df[21,1]
## [1] Iraq
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe

2. Regress Internet users on log GPD PPP

regr2 <- lm(Int.users.per.100 ~ log(GDP.ppp.pc.us), data = df)
summary(regr2)
## 
## Call:
## lm(formula = Int.users.per.100 ~ log(GDP.ppp.pc.us), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.436  -7.716   0.998   8.193  22.431 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -151.951     18.049  -8.419 1.82e-11 ***
## log(GDP.ppp.pc.us)   20.865      1.858  11.229 7.46e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.92 on 55 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.6963, Adjusted R-squared:  0.6908 
## F-statistic: 126.1 on 1 and 55 DF,  p-value: 7.458e-16
anova(regr2)
## Analysis of Variance Table
## 
## Response: Int.users.per.100
##                    Df Sum Sq Mean Sq F value    Pr(>F)    
## log(GDP.ppp.pc.us)  1  21065 21064.9   126.1 7.458e-16 ***
## Residuals          55   9188   167.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr2)

plot_model(regr2)

3. Regress Internet users on GPD PPP (standardized scales)

regr3 <- lm(scale(Int.users.per.100) ~ scale(GDP.ppp.pc.us), data = df)
summary(regr3)
## 
## Call:
## lm(formula = scale(Int.users.per.100) ~ scale(GDP.ppp.pc.us), 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23789 -0.44097  0.03443  0.51539  1.22740 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.02359    0.09768   0.242     0.81    
## scale(GDP.ppp.pc.us)  0.67142    0.09855   6.813 7.64e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7375 on 55 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.4577, Adjusted R-squared:  0.4478 
## F-statistic: 46.42 on 1 and 55 DF,  p-value: 7.637e-09
anova(regr3)
## Analysis of Variance Table
## 
## Response: scale(Int.users.per.100)
##                      Df Sum Sq Mean Sq F value    Pr(>F)    
## scale(GDP.ppp.pc.us)  1 25.245 25.2454  46.416 7.637e-09 ***
## Residuals            55 29.914  0.5439                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr3)

plot_model(regr3)