library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(corrplot)
## corrplot 0.92 loaded
library(knitr)
women_entp = read_csv("Data.csv")
## Rows: 50 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): Country, Level of development, European Union Membership, Currency,...
## dbl (5): Number, Women Entrepreneurship Index, Entrepreneurship Index, Infla...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(women_entp)
summary(women_entp)
## Number Country Level of development
## Min. : 1.00 Length:50 Length:50
## 1st Qu.:14.25 Class :character Class :character
## Median :29.00 Mode :character Mode :character
## Mean :29.46
## 3rd Qu.:43.75
## Max. :60.00
## European Union Membership Currency Women Entrepreneurship Index
## Length:50 Length:50 Min. :25.30
## Class :character Class :character 1st Qu.:36.23
## Mode :character Mode :character Median :44.05
## Mean :47.72
## 3rd Qu.:59.48
## Max. :74.80
## Entrepreneurship Index Inflation rate Inflation Type
## Min. :24.80 Min. :-2.250 Length:50
## 1st Qu.:31.80 1st Qu.:-0.450 Class :character
## Median :42.35 Median : 0.600 Mode :character
## Mean :46.80 Mean : 2.652
## 3rd Qu.:65.20 3rd Qu.: 3.650
## Max. :77.60 Max. :26.500
## Female Labor Force Participation Rate
## Min. :13.00
## 1st Qu.:55.90
## Median :61.05
## Mean :58.55
## 3rd Qu.:67.55
## Max. :82.30
str(women_entp)
## spec_tbl_df [50 x 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Number : num [1:50] 16 22 55 8 35 44 57 17 51 53 ...
## $ Country : chr [1:50] "El Salvador" "Greece" "Switzerland" "Bosnia and Herzegovina" ...
## $ Level of development : chr [1:50] "Developing" "Developed" "Developed" "Developing" ...
## $ European Union Membership : chr [1:50] "Not Member" "Member" "Not Member" "Not Member" ...
## $ Currency : chr [1:50] "National Currency" "Euro" "National Currency" "National Currency" ...
## $ Women Entrepreneurship Index : num [1:50] 29.9 43 63.7 31.6 58.5 57.7 36.6 55.4 55.9 52.5 ...
## $ Entrepreneurship Index : num [1:50] 29.6 42 68.6 28.9 54.6 47.4 32.1 60.2 53.1 49.6 ...
## $ Inflation rate : num [1:50] -2.25 -1.7 -1.1 -1 -0.9 -0.9 -0.9 -0.88 -0.5 -0.5 ...
## $ Inflation Type : chr [1:50] "Deflation" "Deflation" "Deflation" "Deflation" ...
## $ Female Labor Force Participation Rate: num [1:50] 55.7 42.5 74.7 51.9 66.5 56.6 62 68.5 61 52.7 ...
## - attr(*, "spec")=
## .. cols(
## .. Number = col_double(),
## .. Country = col_character(),
## .. `Level of development` = col_character(),
## .. `European Union Membership` = col_character(),
## .. Currency = col_character(),
## .. `Women Entrepreneurship Index` = col_double(),
## .. `Entrepreneurship Index` = col_double(),
## .. `Inflation rate` = col_double(),
## .. `Inflation Type` = col_character(),
## .. `Female Labor Force Participation Rate` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
women_entp$`Level of development`<- as.factor(women_entp$`Level of development`)
women_entp$`European Union Membership`<- as.factor(women_entp$`European Union Membership`)
women_entp$Country <- as.factor(women_entp$Country)
women_entp$Currency <- as.factor(women_entp$Currency)
Multicollinearity:
lm = lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+ `Inflation rate`+`Female Labor Force Participation Rate`, data = women_entp)
summary(lm)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Inflation rate` + `Female Labor Force Participation Rate`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1599 -3.4630 -0.5734 2.7374 9.0744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.24174 3.62596 1.446 0.1551
## `Entrepreneurship Index` 0.74512 0.05268 14.145 <2e-16
## `Inflation rate` -0.29493 0.14791 -1.994 0.0521
## `Female Labor Force Participation Rate` 0.14328 0.05627 2.546 0.0143
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Inflation rate` .
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 46 degrees of freedom
## Multiple R-squared: 0.8789, Adjusted R-squared: 0.871
## F-statistic: 111.3 on 3 and 46 DF, p-value: < 2.2e-16
vif(lm)
## `Entrepreneurship Index` `Inflation rate`
## 1.311407 1.176966
## `Female Labor Force Participation Rate`
## 1.137753
Because all rate are <4 The three independent variables are not highly correlated with each other
Data Scaling:
summary(women_entp[,c(6,7,8,10)])
## Women Entrepreneurship Index Entrepreneurship Index Inflation rate
## Min. :25.30 Min. :24.80 Min. :-2.250
## 1st Qu.:36.23 1st Qu.:31.80 1st Qu.:-0.450
## Median :44.05 Median :42.35 Median : 0.600
## Mean :47.72 Mean :46.80 Mean : 2.652
## 3rd Qu.:59.48 3rd Qu.:65.20 3rd Qu.: 3.650
## Max. :74.80 Max. :77.60 Max. :26.500
## Female Labor Force Participation Rate
## Min. :13.00
## 1st Qu.:55.90
## Median :61.05
## Mean :58.55
## 3rd Qu.:67.55
## Max. :82.30
We notice that while Entrepreneurship Index & Female Labor Force Participation Rate range from 0 - 100, inflation rate has a different range
Let’s try to scale this variable to see if we can have a better model:
women_entp_2 <- women_entp
women_entp_2$`Inflation rate` <- scale(women_entp_2$`Inflation rate`)
lm_post_scale <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+ `Inflation rate`+`Female Labor Force Participation Rate`, data= women_entp_2)
summary(lm_post_scale)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Inflation rate` + `Female Labor Force Participation Rate`,
## data = women_entp_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1599 -3.4630 -0.5734 2.7374 9.0744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.45970 3.50200 1.273 0.2092
## `Entrepreneurship Index` 0.74512 0.05268 14.145 <2e-16
## `Inflation rate` -1.59724 0.80102 -1.994 0.0521
## `Female Labor Force Participation Rate` 0.14328 0.05627 2.546 0.0143
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Inflation rate` .
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 46 degrees of freedom
## Multiple R-squared: 0.8789, Adjusted R-squared: 0.871
## F-statistic: 111.3 on 3 and 46 DF, p-value: < 2.2e-16
Model does not change, no need for scaling
Normal Distribution tests for continuous data:
scatterplotMatrix(women_entp[,c(6,7,8,10)])
summary(lm)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Inflation rate` + `Female Labor Force Participation Rate`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1599 -3.4630 -0.5734 2.7374 9.0744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.24174 3.62596 1.446 0.1551
## `Entrepreneurship Index` 0.74512 0.05268 14.145 <2e-16
## `Inflation rate` -0.29493 0.14791 -1.994 0.0521
## `Female Labor Force Participation Rate` 0.14328 0.05627 2.546 0.0143
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Inflation rate` .
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 46 degrees of freedom
## Multiple R-squared: 0.8789, Adjusted R-squared: 0.871
## F-statistic: 111.3 on 3 and 46 DF, p-value: < 2.2e-16
lm1 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+ `Inflation rate`+`Female Labor Force Participation Rate`, data = women_entp)
summary(lm1)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Inflation rate` + `Female Labor Force Participation Rate`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1599 -3.4630 -0.5734 2.7374 9.0744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.24174 3.62596 1.446 0.1551
## `Entrepreneurship Index` 0.74512 0.05268 14.145 <2e-16
## `Inflation rate` -0.29493 0.14791 -1.994 0.0521
## `Female Labor Force Participation Rate` 0.14328 0.05627 2.546 0.0143
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Inflation rate` .
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 46 degrees of freedom
## Multiple R-squared: 0.8789, Adjusted R-squared: 0.871
## F-statistic: 111.3 on 3 and 46 DF, p-value: < 2.2e-16
lm_post_scale <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+ `Inflation rate`+`Female Labor Force Participation Rate`, data= women_entp_2)
Let’s try to normalize our continuous variables by performing BoxCox Transformation:
Transform women entrepreneurship index:
powerTransform(women_entp$`Women Entrepreneurship Index`)
## Estimated transformation parameter
## women_entp$`Women Entrepreneurship Index`
## 0.3797689
a <- coef(powerTransform(women_entp$`Women Entrepreneurship Index`))
women_entp$bcWomenEntrepreneurshipIndex<- bcPower(women_entp$`Women Entrepreneurship Index`,a)
Transform Entrepreneurship Index
powerTransform(women_entp$`Entrepreneurship Index`)
## Estimated transformation parameter
## women_entp$`Entrepreneurship Index`
## 0.0492938
b <- coef(powerTransform(women_entp$`Entrepreneurship Index`))
women_entp$bcEntrepreneurshipIndex<- bcPower(women_entp$`Entrepreneurship Index`,b)
Transform Female Labor Force Participation Rate:
powerTransform(women_entp$`Female Labor Force Participation Rate`)
## Estimated transformation parameter
## women_entp$`Female Labor Force Participation Rate`
## 2.611958
d <- coef(powerTransform(women_entp$`Female Labor Force Participation Rate`))
bcPower(women_entp$`Female Labor Force Participation Rate`,d)
## [1] 13903.4282 6859.5037 29926.9569 11560.1678 22088.1552 14497.8914
## [7] 18393.9997 23865.6407 17629.1153 12031.4123 17179.7724 16288.0752
## [13] 19579.1648 16446.7084 14034.2050 23144.4533 15314.5438 17328.7581
## [19] 9021.5507 29199.9718 23323.4706 22001.5019 25160.7565 25538.5723
## [25] 15453.3452 24507.9192 20560.3846 16446.7084 22612.4979 310.5072
## [31] 18705.5836 22349.3776 38545.4375 24507.9192 15803.7368 7826.1133
## [37] 19498.7300 5015.7711 28180.4896 24693.3639 726.9825 1749.3938
## [43] 17704.7027 2858.7675 23413.2986 14034.2050 20477.4835 20977.9957
## [49] 17478.5385 9071.5617
women_entp$bcFemaleLaborForceParticipationRate <- bcPower(women_entp$`Female Labor Force Participation Rate`,d)
Testing the distribution after transformation:
ggplot(data = women_entp,aes(sample = women_entp$`Women Entrepreneurship Index`))+ stat_qq()+ stat_qq_line()
scatterplotMatrix(women_entp[,c(11:13)])
hist(women_entp$`Inflation rate`)
View(women_entp)
lm1 <- lm(`bcWomenEntrepreneurshipIndex` ~ `bcEntrepreneurshipIndex`+ `Inflation rate`+`bcFemaleLaborForceParticipationRate`, data = women_entp)
summary(lm1)
##
## Call:
## lm(formula = bcWomenEntrepreneurshipIndex ~ bcEntrepreneurshipIndex +
## `Inflation rate` + bcFemaleLaborForceParticipationRate, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.07968 -0.34121 -0.06845 0.28535 1.01660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.405e+00 8.179e-01 -2.941 0.00511 **
## bcEntrepreneurshipIndex 2.594e+00 2.029e-01 12.784 < 2e-16 ***
## `Inflation rate` -2.768e-02 1.474e-02 -1.878 0.06667 .
## bcFemaleLaborForceParticipationRate 1.962e-05 1.002e-05 1.958 0.05627 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.51 on 46 degrees of freedom
## Multiple R-squared: 0.8618, Adjusted R-squared: 0.8528
## F-statistic: 95.6 on 3 and 46 DF, p-value: < 2.2e-16
summary(lm)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Inflation rate` + `Female Labor Force Participation Rate`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1599 -3.4630 -0.5734 2.7374 9.0744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.24174 3.62596 1.446 0.1551
## `Entrepreneurship Index` 0.74512 0.05268 14.145 <2e-16
## `Inflation rate` -0.29493 0.14791 -1.994 0.0521
## `Female Labor Force Participation Rate` 0.14328 0.05627 2.546 0.0143
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Inflation rate` .
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.168 on 46 degrees of freedom
## Multiple R-squared: 0.8789, Adjusted R-squared: 0.871
## F-statistic: 111.3 on 3 and 46 DF, p-value: < 2.2e-16
ggplot(women_entp,
aes(x=`Women Entrepreneurship Index`, y=`Entrepreneurship Index`, shape=`Level of development`, color=`Level of development`))+
geom_point() + # Adding points (scatterplot)
geom_smooth(method = "lm") + # Adding regression lines
ylab("Entrepreneurship Index") + # y-axis label
xlab("Women Entrepreneurship Index") + # x-axis label
ggtitle("Index by Level of Development")
## `geom_smooth()` using formula 'y ~ x'
ggplot(women_entp,
aes(x=`Women Entrepreneurship Index`, y=`Entrepreneurship Index`, shape=`European Union Membership`, color=`European Union Membership`))+
geom_point() + # Adding points (scatterplot)
geom_smooth(method = "lm") + # Adding regression lines
ylab("Entrepreneurship Index") + # y-axis label
xlab("Women Entrepreneurship Index") + # x-axis label
ggtitle("Index by European Union Membership")
## `geom_smooth()` using formula 'y ~ x'
boxplot(women_entp$`bcFemaleLaborForceParticipationRate` ~ women_entp$`Level of development`,
main = "Female Labor Force Participation Rate by Development Level",
xlab = "Female Labor Force Participation Rate",
ylab = "Level of Development",
horizontal = TRUE)
boxplot(women_entp$`Women Entrepreneurship Index` ~ women_entp$`European Union Membership`,
main = "Women Entrepreneurship Index by European Union Membership",
xlab = "Women Entrepreneurship Index",
ylab = "European Union Membership",
horizontal = FALSE)
colors <- c ("Blue","Red")
barplot(women_entp$`Entrepreneurship Index`,
main = "Entrepreneurship Index Rate by Country",
ylab = "Entrepreneurship Index Rate",
names.arg = women_entp$Country,
col = colors[women_entp$`Level of development`],
horiz = FALSE)
legend(x = "topright", legend = c("Developed", "Developing"), col = c("Blue","Red"))
axis(side=2, at=seq(0,100,by=5))
Our potential predictors are : European Union Membership, Level of development, and Inflation Type
The potential Dependent variables are: Women Entrepreneurship Index, Entrepreneurship Index, Inflation rate, and Female Labor Force Participation Rate
EU Membership vs. Women Entrepreneurship Index
eu_aov <- aov(women_entp$`Women Entrepreneurship Index`~ women_entp$`European Union Membership`, data = women_entp)
anova(eu_aov)
## Analysis of Variance Table
##
## Response: women_entp$`Women Entrepreneurship Index`
## Df Sum Sq Mean Sq F value Pr(>F)
## women_entp$`European Union Membership` 1 4115.1 4115.1 32.742 6.644e-07 ***
## Residuals 48 6032.7 125.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(eu_aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = women_entp$`Women Entrepreneurship Index` ~ women_entp$`European Union Membership`, data = women_entp)
##
## $`women_entp$`European Union Membership``
## diff lwr upr p adj
## Not Member-Member -18.51833 -25.02531 -12.01135 7e-07
It seems that there is an association between EU Membership and Women Entrepreneurship Index, could be that there is a correlation between developed nations and its Women Entrepreneurship Index.
Level of Development vs Women Entrepreneurship Index
development_aov <- aov(women_entp$`Women Entrepreneurship Index`~women_entp$`Level of development`)
anova(development_aov)
## Analysis of Variance Table
##
## Response: women_entp$`Women Entrepreneurship Index`
## Df Sum Sq Mean Sq F value Pr(>F)
## women_entp$`Level of development` 1 7624.8 7624.8 145.06 4.09e-16 ***
## Residuals 48 2523.1 52.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey1 <- TukeyHSD(development_aov)
tukey1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = women_entp$`Women Entrepreneurship Index` ~ women_entp$`Level of development`)
##
## $`women_entp$`Level of development``
## diff lwr upr p adj
## Developing-Developed -24.71763 -28.84404 -20.59122 0
plot(tukey1)
Looks like there is a difference in Women Entrepreneurship index means between countries with 2 levels of development: “Developed” & “Developing”, thus an association between Women Entrepreneurship index and Level of Development. We will measure this correlation in further tests.
Level of Development vs Female Labor Force Participation Rate
development_labor_aov <- aov(women_entp$`Female Labor Force Participation Rate`~women_entp$`Level of development`)
anova(development_labor_aov)
## Analysis of Variance Table
##
## Response: women_entp$`Female Labor Force Participation Rate`
## Df Sum Sq Mean Sq F value Pr(>F)
## women_entp$`Level of development` 1 1277.0 1277.04 7.3658 0.009206 **
## Residuals 48 8321.9 173.37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey2 <- TukeyHSD(development_labor_aov)
tukey2
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = women_entp$`Female Labor Force Participation Rate` ~ women_entp$`Level of development`)
##
## $`women_entp$`Level of development``
## diff lwr upr p adj
## Developing-Developed -10.11567 -17.60973 -2.621621 0.009206
plot(tukey2)
An Alternative: Performing T-Test
Level of Development is a factor to predict both Female Labor Force Participation Rate and Women Entrepreneurship Index of a country.
Inflation Type vs. Women Entrepreneurship Index
hist(women_entp$`Inflation rate`, col = "yellowgreen",breaks = 20, freq = FALSE, xaxt="n")
axis(side=1, at=seq(-5,30,by=2))
inflation_aov <- aov(women_entp$`Women Entrepreneurship Index`~women_entp$`Inflation Type`)
anova(inflation_aov)
## Analysis of Variance Table
##
## Response: women_entp$`Women Entrepreneurship Index`
## Df Sum Sq Mean Sq F value Pr(>F)
## women_entp$`Inflation Type` 3 3861.2 1287.06 9.4175 5.776e-05 ***
## Residuals 46 6286.7 136.67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey3 <- TukeyHSD(inflation_aov)
tukey3
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = women_entp$`Women Entrepreneurship Index` ~ women_entp$`Inflation Type`)
##
## $`women_entp$`Inflation Type``
## diff lwr upr
## Galloping Inflation-Deflation -19.761176 -35.614218 -3.908135
## Moderate Inflation-Deflation 4.006192 -6.396859 14.409242
## Walking Inflation-Deflation -15.352288 -28.197807 -2.506768
## Moderate Inflation-Galloping Inflation 23.767368 8.105105 39.429632
## Walking Inflation-Galloping Inflation 4.408889 -12.971860 21.789638
## Walking Inflation-Moderate Inflation -19.358480 -31.967800 -6.749159
## p adj
## Galloping Inflation-Deflation 0.0091848
## Moderate Inflation-Deflation 0.7348986
## Walking Inflation-Deflation 0.0133556
## Moderate Inflation-Galloping Inflation 0.0011007
## Walking Inflation-Galloping Inflation 0.9055981
## Walking Inflation-Moderate Inflation 0.0009509
plot(tukey3)
inflation_aov2 <- aov(women_entp$`Female Labor Force Participation Rate`~women_entp$`Inflation Type`)
anova(inflation_aov2)
## Analysis of Variance Table
##
## Response: women_entp$`Female Labor Force Participation Rate`
## Df Sum Sq Mean Sq F value Pr(>F)
## women_entp$`Inflation Type` 3 934.1 311.36 1.653 0.1903
## Residuals 46 8664.9 188.37
There is a correlation/association between Inflation Type and Women Entrepreneurship Index. However, correlation doesn’t mean causation, so we’re going to test how strong the Inflation Type-Women Entrepreneurship Index interaction is and its reliability by conducting some regression models below.
cmat <- cor(women_entp[,c(6:8,10)])
corrplot.mixed(cmat)
cor(women_entp[,c(6:8,10)])
## Women Entrepreneurship Index
## Women Entrepreneurship Index 1.0000000
## Entrepreneurship Index 0.9225547
## Inflation rate -0.4531406
## Female Labor Force Participation Rate 0.4443458
## Entrepreneurship Index Inflation rate
## Women Entrepreneurship Index 0.9225547 -0.4531406
## Entrepreneurship Index 1.0000000 -0.3876506
## Inflation rate -0.3876506 1.0000000
## Female Labor Force Participation Rate 0.3478316 -0.1434566
## Female Labor Force Participation Rate
## Women Entrepreneurship Index 0.4443458
## Entrepreneurship Index 0.3478316
## Inflation rate -0.1434566
## Female Labor Force Participation Rate 1.0000000
REGRESSION MODEL:
lm1 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`, data = women_entp)
summary(lm1)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7238 -3.4104 -0.8712 2.8059 9.7909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.60455 3.48141 0.748 0.4581
## `Entrepreneurship Index` 0.78336 0.05059 15.483 <2e-16
## `Female Labor Force Participation Rate` 0.14440 0.05802 2.489 0.0164
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Female Labor Force Participation Rate` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.33 on 47 degrees of freedom
## Multiple R-squared: 0.8684, Adjusted R-squared: 0.8628
## F-statistic: 155.1 on 2 and 47 DF, p-value: < 2.2e-16
Adding the level of development variable:
lm2 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `Level of development`, data = women_entp)
summary(lm2)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `Level of development`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3672 -2.2971 0.0437 2.4707 10.4389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.46869 5.03596 3.866 0.000346
## `Entrepreneurship Index` 0.55812 0.06942 8.040 2.58e-10
## `Female Labor Force Participation Rate` 0.11239 0.05055 2.223 0.031161
## `Level of development`Developing -9.26564 2.22310 -4.168 0.000134
##
## (Intercept) ***
## `Entrepreneurship Index` ***
## `Female Labor Force Participation Rate` *
## `Level of development`Developing ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.59 on 46 degrees of freedom
## Multiple R-squared: 0.9045, Adjusted R-squared: 0.8983
## F-statistic: 145.2 on 3 and 46 DF, p-value: < 2.2e-16
summary(lm2)$adj.r.squared
## [1] 0.8982792
summary(lm)$adj.r.squared
## [1] 0.8710146
Level of development is significant. We should add this to our final model.
Adding EU membership variable:
lm3 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `European Union Membership`, data = women_entp)
summary(lm3)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `European Union Membership`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.216 -2.981 -0.264 2.154 10.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.14727 3.66882 3.038 0.003913
## `Entrepreneurship Index` 0.68201 0.05035 13.547 < 2e-16
## `Female Labor Force Participation Rate` 0.14465 0.05024 2.879 0.006031
## `European Union Membership`Not Member -6.35642 1.55602 -4.085 0.000174
##
## (Intercept) **
## `Entrepreneurship Index` ***
## `Female Labor Force Participation Rate` **
## `European Union Membership`Not Member ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.615 on 46 degrees of freedom
## Multiple R-squared: 0.9035, Adjusted R-squared: 0.8972
## F-statistic: 143.5 on 3 and 46 DF, p-value: < 2.2e-16
EU membership is significant too. We should add it to the model
Adding Inflation type:
lm4 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `Inflation Type`, data = women_entp)
summary(lm4)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `Inflation Type`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7482 -3.8418 -0.1011 2.8111 10.0654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.77186 4.08215 1.659 0.1042
## `Entrepreneurship Index` 0.73536 0.05999 12.258 8.75e-16
## `Female Labor Force Participation Rate` 0.14404 0.05828 2.471 0.0174
## `Inflation Type`Galloping Inflation -5.76335 2.88115 -2.000 0.0517
## `Inflation Type`Moderate Inflation -1.74323 1.79543 -0.971 0.3369
## `Inflation Type`Walking Inflation -3.66927 2.32621 -1.577 0.1219
##
## (Intercept)
## `Entrepreneurship Index` ***
## `Female Labor Force Participation Rate` *
## `Inflation Type`Galloping Inflation .
## `Inflation Type`Moderate Inflation
## `Inflation Type`Walking Inflation
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.221 on 44 degrees of freedom
## Multiple R-squared: 0.8818, Adjusted R-squared: 0.8684
## F-statistic: 65.64 on 5 and 44 DF, p-value: < 2.2e-16
Inflation type is not statistically significant. We should not use this as predictor.
Combining both EU membership and Level of development as predictors in our linear regression model.
lm5 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `European Union Membership`+`Level of development`, data = women_entp)
summary(lm5)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `European Union Membership` +
## `Level of development`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3529 -2.2931 -0.0482 1.9733 10.6447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.43777 5.12623 3.402 0.00141
## `Entrepreneurship Index` 0.59229 0.07177 8.253 1.48e-10
## `Female Labor Force Participation Rate` 0.12535 0.05047 2.484 0.01679
## `European Union Membership`Not Member -3.51329 2.24790 -1.563 0.12508
## `Level of development`Developing -5.55556 3.22907 -1.720 0.09221
##
## (Intercept) **
## `Entrepreneurship Index` ***
## `Female Labor Force Participation Rate` *
## `European Union Membership`Not Member
## `Level of development`Developing .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.519 on 45 degrees of freedom
## Multiple R-squared: 0.9094, Adjusted R-squared: 0.9014
## F-statistic: 113 on 4 and 45 DF, p-value: < 2.2e-16
plot(lm5)
When combined, both of these factors are significant.
lm6 <- lm(`Women Entrepreneurship Index` ~(`Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `European Union Membership`)^2 , data = women_entp)
summary(lm6)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ (`Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `European Union Membership`)^2,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7071 -2.5883 0.2194 1.8633 9.9920
##
## Coefficients:
## Estimate
## (Intercept) 50.972348
## `Entrepreneurship Index` 0.003225
## `Female Labor Force Participation Rate` -0.374999
## `European Union Membership`Not Member -26.634909
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.008631
## `Entrepreneurship Index`:`European Union Membership`Not Member 0.182944
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.171653
## Std. Error
## (Intercept) 13.989126
## `Entrepreneurship Index` 0.266654
## `Female Labor Force Participation Rate` 0.264830
## `European Union Membership`Not Member 8.371874
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.003683
## `Entrepreneurship Index`:`European Union Membership`Not Member 0.144850
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.190931
## t value
## (Intercept) 3.644
## `Entrepreneurship Index` 0.012
## `Female Labor Force Participation Rate` -1.416
## `European Union Membership`Not Member -3.181
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 2.344
## `Entrepreneurship Index`:`European Union Membership`Not Member 1.263
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.899
## Pr(>|t|)
## (Intercept) 0.000719
## `Entrepreneurship Index` 0.990408
## `Female Labor Force Participation Rate` 0.163978
## `European Union Membership`Not Member 0.002720
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.023787
## `Entrepreneurship Index`:`European Union Membership`Not Member 0.213400
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.373644
##
## (Intercept) ***
## `Entrepreneurship Index`
## `Female Labor Force Participation Rate`
## `European Union Membership`Not Member **
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` *
## `Entrepreneurship Index`:`European Union Membership`Not Member
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.255 on 43 degrees of freedom
## Multiple R-squared: 0.9233, Adjusted R-squared: 0.9126
## F-statistic: 86.24 on 6 and 43 DF, p-value: < 2.2e-16
summary(lm6)$adj.r.squared
## [1] 0.9125688
lm7 <- lm(`Women Entrepreneurship Index` ~ `Entrepreneurship Index`+`Female Labor Force Participation Rate`+ `European Union Membership`+`Level of development`+`Entrepreneurship Index`:`Female Labor Force Participation Rate`+ `Entrepreneurship Index`:`Level of development`, data =women_entp)
summary(lm7)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Entrepreneurship Index` +
## `Female Labor Force Participation Rate` + `European Union Membership` +
## `Level of development` + `Entrepreneurship Index`:`Female Labor Force Participation Rate` +
## `Entrepreneurship Index`:`Level of development`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3795 -3.0860 -0.1211 2.0883 9.6437
##
## Coefficients:
## Estimate
## (Intercept) 33.110133
## `Entrepreneurship Index` 0.219467
## `Female Labor Force Participation Rate` -0.129003
## `European Union Membership`Not Member -4.282898
## `Level of development`Developing -5.380173
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.005944
## `Entrepreneurship Index`:`Level of development`Developing 0.018949
## Std. Error
## (Intercept) 14.482872
## `Entrepreneurship Index` 0.343521
## `Female Labor Force Participation Rate` 0.194817
## `European Union Membership`Not Member 2.305267
## `Level of development`Developing 9.016243
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.004728
## `Entrepreneurship Index`:`Level of development`Developing 0.211059
## t value
## (Intercept) 2.286
## `Entrepreneurship Index` 0.639
## `Female Labor Force Participation Rate` -0.662
## `European Union Membership`Not Member -1.858
## `Level of development`Developing -0.597
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 1.257
## `Entrepreneurship Index`:`Level of development`Developing 0.090
## Pr(>|t|)
## (Intercept) 0.0272 *
## `Entrepreneurship Index` 0.5263
## `Female Labor Force Participation Rate` 0.5114
## `European Union Membership`Not Member 0.0700 .
## `Level of development`Developing 0.5538
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.2155
## `Entrepreneurship Index`:`Level of development`Developing 0.9289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.511 on 43 degrees of freedom
## Multiple R-squared: 0.9138, Adjusted R-squared: 0.9017
## F-statistic: 75.95 on 6 and 43 DF, p-value: < 2.2e-16
summary(lm7)$adj.r.squared
## [1] 0.9017475
summary(lm7)$coefficients
## Estimate
## (Intercept) 33.110132975
## `Entrepreneurship Index` 0.219467275
## `Female Labor Force Participation Rate` -0.129003241
## `European Union Membership`Not Member -4.282897938
## `Level of development`Developing -5.380173313
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.005943545
## `Entrepreneurship Index`:`Level of development`Developing 0.018948823
## Std. Error
## (Intercept) 14.482872017
## `Entrepreneurship Index` 0.343520685
## `Female Labor Force Participation Rate` 0.194816933
## `European Union Membership`Not Member 2.305266824
## `Level of development`Developing 9.016243208
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.004727734
## `Entrepreneurship Index`:`Level of development`Developing 0.211059454
## t value
## (Intercept) 2.28615795
## `Entrepreneurship Index` 0.63887645
## `Female Labor Force Participation Rate` -0.66217674
## `European Union Membership`Not Member -1.85787515
## `Level of development`Developing -0.59672007
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 1.25716566
## `Entrepreneurship Index`:`Level of development`Developing 0.08977955
## Pr(>|t|)
## (Intercept) 0.02723243
## `Entrepreneurship Index` 0.52629196
## `Female Labor Force Participation Rate` 0.51139148
## `European Union Membership`Not Member 0.07004243
## `Level of development`Developing 0.55382324
## `Entrepreneurship Index`:`Female Labor Force Participation Rate` 0.21547935
## `Entrepreneurship Index`:`Level of development`Developing 0.92887929
It seems like lm7 is the fittest model to predict Women Entrepreneurship Index with R-squared = 0.9975. In this model, there is one pair of interacting variables that are Entrpreneurship Index:Level of development Developing. What this means is that if a country entrepreneurship index increase by 1 and it is a developing country, the women entrepreneurship index is predicted to increase by 0.0143
Our Final Model Can Be Represented in Formula as below:
#y = women entrepreneurship index, E= EU membership, L = Level of development, x1 = Entrepreneurship Index, x2= Inflation rate, x3 = Female Labor Force Participation Rate
#y = 3.596 + 0.02617x1 - 0.0007903x2 - 0.0008124x3 - 0.01126E - 0.626L - 0.00003545*x1*x3 - 0.01434*x1*L
CONCLUSION: Our model to predict WEI is good but the result is not very interesting because it’s obvious that entrepreneurship index, naturally, is a factor that is going to impact WEI the most without further analysis.
lm_lf <- lm(`bcFemaleLaborForceParticipationRate` ~ `bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`+`Inflation rate`, data = women_entp)
summary(lm_lf)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex + `Inflation rate`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12971 -5785 1488 4839 16068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2809.3 12600.5 -0.223 0.8246
## bcWomenEntrepreneurshipIndex 3923.2 2003.4 1.958 0.0563 .
## bcEntrepreneurshipIndex -3350.6 6102.1 -0.549 0.5856
## `Inflation rate` 114.6 215.6 0.532 0.5975
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7213 on 46 degrees of freedom
## Multiple R-squared: 0.2196, Adjusted R-squared: 0.1687
## F-statistic: 4.314 on 3 and 46 DF, p-value: 0.009201
2 significant variables are bcwomenentrepreneurshipindex and bcentrepreneurshipindex. However, adj.r-squared is low (adj.r^2 = 0.1779). Since inflation rate is not statisitcally significant. Let’s drop this variable and try to add other factors as our predictors to see if adj.r^2 improve.
Adding level of development to the model
lm_lf1 <- lm(`bcFemaleLaborForceParticipationRate` ~ `bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`+`Inflation rate`+ `Level of development`, data = women_entp)
summary(lm_lf1)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex + `Inflation rate` + `Level of development`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13682 -5568 1117 4814 15888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10068.63 19582.70 -0.514 0.6097
## bcWomenEntrepreneurshipIndex 4443.64 2285.21 1.945 0.0581 .
## bcEntrepreneurshipIndex -2917.81 6217.12 -0.469 0.6411
## `Inflation rate` 88.38 223.97 0.395 0.6950
## `Level of development`Developing 2109.31 4329.67 0.487 0.6285
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7273 on 45 degrees of freedom
## Multiple R-squared: 0.2237, Adjusted R-squared: 0.1547
## F-statistic: 3.241 on 4 and 45 DF, p-value: 0.02025
Level of development is not significant, we should drop this variable.
Adding EU membership
lm_lf2 <- lm(`bcFemaleLaborForceParticipationRate` ~ `bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`+`Inflation rate`+ `European Union Membership`, data = women_entp)
summary(lm_lf2)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex + `Inflation rate` + `European Union Membership`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13493 -4589 1093 5110 12037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12859.22 13370.81 -0.962 0.3413
## bcWomenEntrepreneurshipIndex 5475.23 2116.28 2.587 0.0130 *
## bcEntrepreneurshipIndex -4866.29 5993.18 -0.812 0.4211
## `Inflation rate` 41.23 213.41 0.193 0.8477
## `European Union Membership`Not Member 5167.95 2739.21 1.887 0.0657 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7020 on 45 degrees of freedom
## Multiple R-squared: 0.2768, Adjusted R-squared: 0.2125
## F-statistic: 4.306 on 4 and 45 DF, p-value: 0.004926
EU Membership is not significant either.
Adding Inflation type
lm_lf3 <- lm(`bcFemaleLaborForceParticipationRate` ~ `bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`+`Inflation rate`+ `Inflation Type`, data = women_entp)
summary(lm_lf3)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex + `Inflation rate` + `Inflation Type`,
## data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13668 -5032 1134 3718 15502
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8359.9 14461.7 -0.578 0.5662
## bcWomenEntrepreneurshipIndex 3790.4 2033.8 1.864 0.0692 .
## bcEntrepreneurshipIndex -2134.1 6384.5 -0.334 0.7398
## `Inflation rate` -618.3 511.5 -1.209 0.2333
## `Inflation Type`Galloping Inflation 15803.4 9790.1 1.614 0.1138
## `Inflation Type`Moderate Inflation 2918.6 2586.0 1.129 0.2653
## `Inflation Type`Walking Inflation 4933.6 4583.7 1.076 0.2878
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7203 on 43 degrees of freedom
## Multiple R-squared: 0.2724, Adjusted R-squared: 0.1708
## F-statistic: 2.683 on 6 and 43 DF, p-value: 0.02664
Inflation type is not significant either.
After testing, we can conclude that only bcWomenEntrepreneurshipIndex and bcEntrepreneurshipIndex are significant. Let’s proceed to test for interaction effects between these 2 variables.
Testing for interaction between bcWomenEntrepreneurshipIndex and bcEntrepreneurshipIndex to predict femalelaborparticipartionrate:
lm_lf4 <- lm(`bcFemaleLaborForceParticipationRate` ~ (`bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`)^2, data = women_entp)
summary(lm_lf4)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ (bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex)^2, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13268.4 -4519.2 -258.8 4486.3 14054.4
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 205723 74599
## bcWomenEntrepreneurshipIndex -20384 8764
## bcEntrepreneurshipIndex -53594 18864
## bcWomenEntrepreneurshipIndex:bcEntrepreneurshipIndex 5784 2064
## t value Pr(>|t|)
## (Intercept) 2.758 0.00832 **
## bcWomenEntrepreneurshipIndex -2.326 0.02449 *
## bcEntrepreneurshipIndex -2.841 0.00668 **
## bcWomenEntrepreneurshipIndex:bcEntrepreneurshipIndex 2.802 0.00741 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6687 on 46 degrees of freedom
## Multiple R-squared: 0.3293, Adjusted R-squared: 0.2855
## F-statistic: 7.527 on 3 and 46 DF, p-value: 0.0003367
According to the model, there is no interaction between these 2 varibles. As a result, we should keep them seperate
lm_lf5 <- lm(`bcFemaleLaborForceParticipationRate` ~ `bcWomenEntrepreneurshipIndex` + `bcEntrepreneurshipIndex`, data = women_entp)
summary(lm_lf5)
##
## Call:
## lm(formula = bcFemaleLaborForceParticipationRate ~ bcWomenEntrepreneurshipIndex +
## bcEntrepreneurshipIndex, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13416 -5270 1349 5368 16324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -917.6 11995.1 -0.076 0.9393
## bcWomenEntrepreneurshipIndex 3650.5 1921.8 1.900 0.0636 .
## bcEntrepreneurshipIndex -3164.1 6045.3 -0.523 0.6032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7157 on 47 degrees of freedom
## Multiple R-squared: 0.2148, Adjusted R-squared: 0.1814
## F-statistic: 6.428 on 2 and 47 DF, p-value: 0.003406
lm_lf5 is our best fit model to predict FemaleLaborForceParticipationRate. However, this model is not very reliable because only 19.34% of the variance in FemaleLaborForceParticipationRate is explained by lm_lf5.
The formula to predict FemaleLaborForceParticipationRate can be represented as below:
# y = FemaleLAborForceParticipationRate , x1= bcWomenEntrepreneurshipIndex, x2 = bcEntrepreneurshipIndex
#Model y = -197060 -557851x1 + 402588x2
cmatlf <- cor(women_entp[,c(6:8,10)])
corrplot.mixed(cmatlf)
cor(women_entp[,c(6:8,10)])
## Women Entrepreneurship Index
## Women Entrepreneurship Index 1.0000000
## Entrepreneurship Index 0.9225547
## Inflation rate -0.4531406
## Female Labor Force Participation Rate 0.4443458
## Entrepreneurship Index Inflation rate
## Women Entrepreneurship Index 0.9225547 -0.4531406
## Entrepreneurship Index 1.0000000 -0.3876506
## Inflation rate -0.3876506 1.0000000
## Female Labor Force Participation Rate 0.3478316 -0.1434566
## Female Labor Force Participation Rate
## Women Entrepreneurship Index 0.4443458
## Entrepreneurship Index 0.3478316
## Inflation rate -0.1434566
## Female Labor Force Participation Rate 1.0000000
Cohen’s rules of thumb:
|r| <= 0.1 give small (but should still be paid attention to) correlation, |r| <= 0.3 is medium, |r| >= 0.5 is large and can be observed by a casual observer.
Correlation between FemaleLaborForceParticipation rate and Entrpreneurship Index is 0.37 -> medium correlation
Correlation between FemaleLaborForceParticipation rate and Women Entrpreneurship Index is 0.37 also -> medium correlation
Since the correlation of EI and WEI doesn’t bring us much insights to predict WEI, we need to consider drop this variable.
lm8 <- lm(`Women Entrepreneurship Index` ~ `Inflation rate`+`Female Labor Force Participation Rate`+ `European Union Membership`+`Level of development` + `Inflation Type` , data =women_entp)
summary(lm8)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ `Inflation rate` +
## `Female Labor Force Participation Rate` + `European Union Membership` +
## `Level of development` + `Inflation Type`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.766 -2.624 0.013 4.106 11.198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.71835 4.90100 9.532 4.59e-12
## `Inflation rate` 0.69155 0.46163 1.498 0.1416
## `Female Labor Force Participation Rate` 0.16740 0.07576 2.209 0.0327
## `European Union Membership`Not Member 0.37962 3.17702 0.119 0.9055
## `Level of development`Developing -22.88740 3.61199 -6.336 1.30e-07
## `Inflation Type`Galloping Inflation -14.05189 8.71410 -1.613 0.1143
## `Inflation Type`Moderate Inflation 4.28424 2.37161 1.806 0.0780
## `Inflation Type`Walking Inflation -0.52205 4.24696 -0.123 0.9028
##
## (Intercept) ***
## `Inflation rate`
## `Female Labor Force Participation Rate` *
## `European Union Membership`Not Member
## `Level of development`Developing ***
## `Inflation Type`Galloping Inflation
## `Inflation Type`Moderate Inflation .
## `Inflation Type`Walking Inflation
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.596 on 42 degrees of freedom
## Multiple R-squared: 0.8199, Adjusted R-squared: 0.7899
## F-statistic: 27.32 on 7 and 42 DF, p-value: 1.071e-13
Only Level of development and female labor force participation rate is significant. Adj R squared is 0.7419 Testing for interaction
lm9 <- lm(`Women Entrepreneurship Index` ~ (`Inflation rate`+`Female Labor Force Participation Rate`+ `European Union Membership`+`Level of development` + `Inflation Type`)^2 , data =women_entp)
summary(lm9)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ (`Inflation rate` +
## `Female Labor Force Participation Rate` + `European Union Membership` +
## `Level of development` + `Inflation Type`)^2, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0462 -2.6926 -0.1102 3.0397 14.8683
##
## Coefficients: (5 not defined because of singularities)
## Estimate
## (Intercept) 25.06667
## `Inflation rate` 3.89836
## `Female Labor Force Participation Rate` 0.54233
## `European Union Membership`Not Member 4.10015
## `Level of development`Developing -2.87096
## `Inflation Type`Galloping Inflation -22.89498
## `Inflation Type`Moderate Inflation 10.31126
## `Inflation Type`Walking Inflation -8.82570
## `Inflation rate`:`Female Labor Force Participation Rate` -0.02818
## `Inflation rate`:`European Union Membership`Not Member 15.55715
## `Inflation rate`:`Level of development`Developing -15.04811
## `Inflation rate`:`Inflation Type`Galloping Inflation -2.62891
## `Inflation rate`:`Inflation Type`Moderate Inflation -1.13661
## `Inflation rate`:`Inflation Type`Walking Inflation -2.09213
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.15342
## `Female Labor Force Participation Rate`:`Level of development`Developing -0.53165
## `Female Labor Force Participation Rate`:`Inflation Type`Galloping Inflation 0.26192
## `Female Labor Force Participation Rate`:`Inflation Type`Moderate Inflation -0.12481
## `Female Labor Force Participation Rate`:`Inflation Type`Walking Inflation 0.10389
## `European Union Membership`Not Member:`Level of development`Developing NA
## `European Union Membership`Not Member:`Inflation Type`Galloping Inflation NA
## `European Union Membership`Not Member:`Inflation Type`Moderate Inflation -42.33959
## `European Union Membership`Not Member:`Inflation Type`Walking Inflation NA
## `Level of development`Developing:`Inflation Type`Galloping Inflation NA
## `Level of development`Developing:`Inflation Type`Moderate Inflation 39.41665
## `Level of development`Developing:`Inflation Type`Walking Inflation NA
## Std. Error
## (Intercept) 18.13077
## `Inflation rate` 5.55172
## `Female Labor Force Participation Rate` 0.28416
## `European Union Membership`Not Member 29.16855
## `Level of development`Developing 28.87864
## `Inflation Type`Galloping Inflation 77.85363
## `Inflation Type`Moderate Inflation 22.17418
## `Inflation Type`Walking Inflation 30.51670
## `Inflation rate`:`Female Labor Force Participation Rate` 0.05917
## `Inflation rate`:`European Union Membership`Not Member 7.89861
## `Inflation rate`:`Level of development`Developing 6.86306
## `Inflation rate`:`Inflation Type`Galloping Inflation 4.45939
## `Inflation rate`:`Inflation Type`Moderate Inflation 4.82684
## `Inflation rate`:`Inflation Type`Walking Inflation 4.57220
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.43268
## `Female Labor Force Participation Rate`:`Level of development`Developing 0.42616
## `Female Labor Force Participation Rate`:`Inflation Type`Galloping Inflation 1.28867
## `Female Labor Force Participation Rate`:`Inflation Type`Moderate Inflation 0.34448
## `Female Labor Force Participation Rate`:`Inflation Type`Walking Inflation 0.49328
## `European Union Membership`Not Member:`Level of development`Developing NA
## `European Union Membership`Not Member:`Inflation Type`Galloping Inflation NA
## `European Union Membership`Not Member:`Inflation Type`Moderate Inflation 16.20988
## `European Union Membership`Not Member:`Inflation Type`Walking Inflation NA
## `Level of development`Developing:`Inflation Type`Galloping Inflation NA
## `Level of development`Developing:`Inflation Type`Moderate Inflation 17.15781
## `Level of development`Developing:`Inflation Type`Walking Inflation NA
## t value
## (Intercept) 1.383
## `Inflation rate` 0.702
## `Female Labor Force Participation Rate` 1.909
## `European Union Membership`Not Member 0.141
## `Level of development`Developing -0.099
## `Inflation Type`Galloping Inflation -0.294
## `Inflation Type`Moderate Inflation 0.465
## `Inflation Type`Walking Inflation -0.289
## `Inflation rate`:`Female Labor Force Participation Rate` -0.476
## `Inflation rate`:`European Union Membership`Not Member 1.970
## `Inflation rate`:`Level of development`Developing -2.193
## `Inflation rate`:`Inflation Type`Galloping Inflation -0.590
## `Inflation rate`:`Inflation Type`Moderate Inflation -0.235
## `Inflation rate`:`Inflation Type`Walking Inflation -0.458
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.355
## `Female Labor Force Participation Rate`:`Level of development`Developing -1.248
## `Female Labor Force Participation Rate`:`Inflation Type`Galloping Inflation 0.203
## `Female Labor Force Participation Rate`:`Inflation Type`Moderate Inflation -0.362
## `Female Labor Force Participation Rate`:`Inflation Type`Walking Inflation 0.211
## `European Union Membership`Not Member:`Level of development`Developing NA
## `European Union Membership`Not Member:`Inflation Type`Galloping Inflation NA
## `European Union Membership`Not Member:`Inflation Type`Moderate Inflation -2.612
## `European Union Membership`Not Member:`Inflation Type`Walking Inflation NA
## `Level of development`Developing:`Inflation Type`Galloping Inflation NA
## `Level of development`Developing:`Inflation Type`Moderate Inflation 2.297
## `Level of development`Developing:`Inflation Type`Walking Inflation NA
## Pr(>|t|)
## (Intercept) 0.1774
## `Inflation rate` 0.4882
## `Female Labor Force Participation Rate` 0.0663
## `European Union Membership`Not Member 0.8892
## `Level of development`Developing 0.9215
## `Inflation Type`Galloping Inflation 0.7708
## `Inflation Type`Moderate Inflation 0.6454
## `Inflation Type`Walking Inflation 0.7745
## `Inflation rate`:`Female Labor Force Participation Rate` 0.6375
## `Inflation rate`:`European Union Membership`Not Member 0.0585
## `Inflation rate`:`Level of development`Developing 0.0365
## `Inflation rate`:`Inflation Type`Galloping Inflation 0.5601
## `Inflation rate`:`Inflation Type`Moderate Inflation 0.8155
## `Inflation rate`:`Inflation Type`Walking Inflation 0.6507
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member 0.7255
## `Female Labor Force Participation Rate`:`Level of development`Developing 0.2222
## `Female Labor Force Participation Rate`:`Inflation Type`Galloping Inflation 0.8404
## `Female Labor Force Participation Rate`:`Inflation Type`Moderate Inflation 0.7197
## `Female Labor Force Participation Rate`:`Inflation Type`Walking Inflation 0.8347
## `European Union Membership`Not Member:`Level of development`Developing NA
## `European Union Membership`Not Member:`Inflation Type`Galloping Inflation NA
## `European Union Membership`Not Member:`Inflation Type`Moderate Inflation 0.0141
## `European Union Membership`Not Member:`Inflation Type`Walking Inflation NA
## `Level of development`Developing:`Inflation Type`Galloping Inflation NA
## `Level of development`Developing:`Inflation Type`Moderate Inflation 0.0290
## `Level of development`Developing:`Inflation Type`Walking Inflation NA
##
## (Intercept)
## `Inflation rate`
## `Female Labor Force Participation Rate` .
## `European Union Membership`Not Member
## `Level of development`Developing
## `Inflation Type`Galloping Inflation
## `Inflation Type`Moderate Inflation
## `Inflation Type`Walking Inflation
## `Inflation rate`:`Female Labor Force Participation Rate`
## `Inflation rate`:`European Union Membership`Not Member .
## `Inflation rate`:`Level of development`Developing *
## `Inflation rate`:`Inflation Type`Galloping Inflation
## `Inflation rate`:`Inflation Type`Moderate Inflation
## `Inflation rate`:`Inflation Type`Walking Inflation
## `Female Labor Force Participation Rate`:`European Union Membership`Not Member
## `Female Labor Force Participation Rate`:`Level of development`Developing
## `Female Labor Force Participation Rate`:`Inflation Type`Galloping Inflation
## `Female Labor Force Participation Rate`:`Inflation Type`Moderate Inflation
## `Female Labor Force Participation Rate`:`Inflation Type`Walking Inflation
## `European Union Membership`Not Member:`Level of development`Developing
## `European Union Membership`Not Member:`Inflation Type`Galloping Inflation
## `European Union Membership`Not Member:`Inflation Type`Moderate Inflation *
## `European Union Membership`Not Member:`Inflation Type`Walking Inflation
## `Level of development`Developing:`Inflation Type`Galloping Inflation
## `Level of development`Developing:`Inflation Type`Moderate Inflation *
## `Level of development`Developing:`Inflation Type`Walking Inflation
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.449 on 29 degrees of freedom
## Multiple R-squared: 0.8811, Adjusted R-squared: 0.7992
## F-statistic: 10.75 on 20 and 29 DF, p-value: 1.393e-08
There is no significant interaction
lm10 <- lm(`Women Entrepreneurship Index` ~ `bcFemaleLaborForceParticipationRate`+`Level of development` , data =women_entp)
summary(lm10)
##
## Call:
## lm(formula = `Women Entrepreneurship Index` ~ bcFemaleLaborForceParticipationRate +
## `Level of development`, data = women_entp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.7012 -4.2344 0.3651 5.5575 14.4798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.259e+01 2.995e+00 17.561 < 2e-16 ***
## bcFemaleLaborForceParticipationRate 3.460e-04 1.323e-04 2.615 0.0119 *
## `Level of development`Developing -2.279e+01 2.074e+00 -10.989 1.4e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.846 on 47 degrees of freedom
## Multiple R-squared: 0.783, Adjusted R-squared: 0.7737
## F-statistic: 84.77 on 2 and 47 DF, p-value: 2.564e-16
plot(lm10)
We can conclude that without EI, lm10 is our best fit model with adj. R squared = 0.7519. To make this model more reliable, we need more data about other factors like education rate, sociological factors,
The model can be represented as below:
# y = WEI, x1= FLFPR, x2 = LVD - Developing
# y = 49.46869 + 0.15676*x1 -22.95155*x2