There is a data set ‘countries.csv’ that contains information on 60 countries of the world and their characteristics:
variable | label | possible_range |
---|---|---|
country | country name | NA |
GDP.ppp.pc.us | GDP PPP per capita in US dollars | 0+ |
Int.users.per.100 | Internet users per 100 | 0-100 |
Status | Freedom House status (free, partly free, or unfree) | Free, Partly free, Unfree |
democracy | binary Przeworski index of democracy (democracy or dictatorship) | Yes, No |
P4 | Polity IV index (ordinal scale from -10, autocracy, to +10, full democracy) | [-10; 10] |
df <- read.csv("countries.csv", header = T)
df$P4 <- as.factor(df$P4)
df <- df[,c(-1, -8, -9)]
labs <- c("Country name",
"GDP per capita, PPP",
"Internet per 100",
"Freedom House Index",
"Democracy-Dictatorship",
"Polity IV")
library(sjlabelled)
df <- set_label(df, label = labs)
library(sjPlot)
view_df(df, verbose = F)
ID | Name | Label | Values | Value Labels |
---|---|---|---|---|
1 | country | Country name |
Algeria Argentina Armenia Australia Azerbaijan Bahrain Belarus Brazil Chile China Colombia Cyprus Ecuador Egypt Estonia <… truncated> |
|
2 | GDP.ppp.pc.us | GDP per capita, PPP | range: 1535.5-122609.4 | |
3 | Int.users.per.100 | Internet per 100 | range: 7.1-92.9 | |
4 | Status | Freedom House Index |
Free Partly free Unfree |
|
5 | democracy | Democracy-Dictatorship |
No Yes |
|
6 | P4 | Polity IV |
-10 -9 -7 -6 -4 -3 -2 2 3 4 5 6 7 8 9 <… truncated> |
library(psych)
describe(df)
## vars n mean sd median trimmed mad
## country* 1 60 30.50 17.46 30.50 30.50 22.24
## GDP.ppp.pc.us 2 57 23234.14 21823.75 16457.09 19795.13 13988.32
## Int.users.per.100 3 58 49.26 23.42 48.70 49.17 28.32
## Status* 4 60 1.83 0.81 2.00 1.79 1.48
## democracy* 5 58 1.60 0.49 2.00 1.62 0.00
## P4* 6 56 11.25 4.86 13.00 11.74 4.45
## min max range skew kurtosis se
## country* 1.00 60.00 59.00 0.00 -1.26 2.25
## GDP.ppp.pc.us 1535.48 122609.41 121073.93 2.18 6.18 2890.63
## Int.users.per.100 7.10 92.86 85.76 0.03 -0.96 3.08
## Status* 1.00 3.00 2.00 0.30 -1.43 0.10
## democracy* 1.00 2.00 1.00 -0.41 -1.86 0.06
## P4* 1.00 16.00 15.00 -0.74 -0.89 0.65
plot(Int.users.per.100 ~ GDP.ppp.pc.us, data = df)
cor.test(df$Int.users.per.100, df$GDP.ppp.pc.us)
##
## Pearson's product-moment correlation
##
## data: df$Int.users.per.100 and df$GDP.ppp.pc.us
## t = 6.8129, df = 55, p-value = 7.637e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5049674 0.7966539
## sample estimates:
## cor
## 0.6765198
regr <- lm(Int.users.per.100 ~ GDP.ppp.pc.us, data = df)
summary(regr)
##
## Call:
## lm(formula = Int.users.per.100 ~ GDP.ppp.pc.us, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.410 -10.327 0.806 12.070 28.745
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.307e+01 3.357e+00 9.850 9.57e-14 ***
## GDP.ppp.pc.us 7.205e-04 1.058e-04 6.813 7.64e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.27 on 55 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.4577, Adjusted R-squared: 0.4478
## F-statistic: 46.42 on 1 and 55 DF, p-value: 7.637e-09
anova(regr)
## Analysis of Variance Table
##
## Response: Int.users.per.100
## Df Sum Sq Mean Sq F value Pr(>F)
## GDP.ppp.pc.us 1 13846 13846.1 46.416 7.637e-09 ***
## Residuals 55 16407 298.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr)
plot_model(regr)
df[40,1]
## [1] Qatar
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe
df[47,1]
## [1] South Korea
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe
df[21,1]
## [1] Iraq
## 60 Levels: Algeria Argentina Armenia Australia Azerbaijan ... Zimbabwe
regr2 <- lm(Int.users.per.100 ~ log(GDP.ppp.pc.us), data = df)
summary(regr2)
##
## Call:
## lm(formula = Int.users.per.100 ~ log(GDP.ppp.pc.us), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.436 -7.716 0.998 8.193 22.431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -151.951 18.049 -8.419 1.82e-11 ***
## log(GDP.ppp.pc.us) 20.865 1.858 11.229 7.46e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.92 on 55 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.6963, Adjusted R-squared: 0.6908
## F-statistic: 126.1 on 1 and 55 DF, p-value: 7.458e-16
anova(regr2)
## Analysis of Variance Table
##
## Response: Int.users.per.100
## Df Sum Sq Mean Sq F value Pr(>F)
## log(GDP.ppp.pc.us) 1 21065 21064.9 126.1 7.458e-16 ***
## Residuals 55 9188 167.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr2)
plot_model(regr2)
regr3 <- lm(scale(Int.users.per.100) ~ scale(GDP.ppp.pc.us), data = df)
summary(regr3)
##
## Call:
## lm(formula = scale(Int.users.per.100) ~ scale(GDP.ppp.pc.us),
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.23789 -0.44097 0.03443 0.51539 1.22740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02359 0.09768 0.242 0.81
## scale(GDP.ppp.pc.us) 0.67142 0.09855 6.813 7.64e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7375 on 55 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.4577, Adjusted R-squared: 0.4478
## F-statistic: 46.42 on 1 and 55 DF, p-value: 7.637e-09
anova(regr3)
## Analysis of Variance Table
##
## Response: scale(Int.users.per.100)
## Df Sum Sq Mean Sq F value Pr(>F)
## scale(GDP.ppp.pc.us) 1 25.245 25.2454 46.416 7.637e-09 ***
## Residuals 55 29.914 0.5439
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(regr3)
plot_model(regr3)