| title: “CIENCIA DE DATOS/ Análisis de Base de datos de Emprendimiento Femenino UE” |
| author: “Dr. Roberto Chang López/ Certificado en Machine Learning por MIT” |
| date: “13/12/2020” |
| output: |
| html_document: default |
Base de Datos KAGGLE
#########################Dr. Roberto Enrique Chang López########################## ###################################rchang@unah.edu.hn############################### ##################################################################################### #####################################################################################
Algunos Dashboards elaborados son: Para Bolsa de Valores https://rchang.shinyapps.io/rchang-stock-exchange/
Para el Estado del Clima https://rchang.shinyapps.io/rchang-app_clima_ho/
Para Machine Learning https://rchang.shinyapps.io/rchang-app/
Para Empresariales e Industriales https://rchang.shinyapps.io/rchang-app_final_emp/
Para Dashboards con log in https://rchang.shinyapps.io/clase_3-shiny-2/_w_ae4e775f/_w_f249a9a1/?page=sign_in
y para Sistemas de Información Geográfica
El Planteamiento del Problema
#De las Variables presentadas en la base de datos de Kaggle sobre el empredimiento #femeninino en Europa, se requiere conocer ¿Cuál es el impacto de la inflación en el #índice de emprendimiento femenino en el caso de que la inflación afecte? Asimismo, #Se requiere realizar un análisis y exploración de datos previos con histogramas, #box plots, diagramas de dispersión, pruebas de normalidad, matriz de correlación, #matriz de correlación con p-values, prueba de hipótesis, pruebas de potencia #y el análisis de regresiones que contesten la pregunta de investigación?
#Lectura del archivo csv.
library(readr)
## Warning: package 'readr' was built under R version 4.0.5
Dataset3 <- read_delim("Dataset3.csv", ";",
escape_double = FALSE, trim_ws = TRUE)
##
## -- Column specification --------------------------------------------------------
## cols(
## No = col_double(),
## Country = col_character(),
## `Level of development` = col_character(),
## `European Union Membership` = col_character(),
## Currency = col_character(),
## `Women Entrepreneurship Index` = col_double(),
## `Entrepreneurship Index` = col_double(),
## `Inflation rate` = col_double(),
## `Female Labor Force Participation Rate` = col_double()
## )
Dataset3
## # A tibble: 51 x 9
## No Country `Level of develo~ `European Union M~ Currency `Women Entrepren~
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 4 Austria Developed Member Euro 54.9
## 2 6 Belgium Developed Member Euro 63.6
## 3 17 Estonia Developed Member Euro 55.4
## 4 18 Finland Developed Member Euro 66.4
## 5 19 France Developed Member Euro 68.8
## 6 20 Germany Developed Member Euro 63.6
## 7 22 Greece Developed Member Euro 43
## 8 28 Ireland Developed Member Euro 64.3
## 9 30 Italy Developed Member Euro 51.4
## 10 34 Latvia Developed Member Euro 56.6
## # ... with 41 more rows, and 3 more variables: Entrepreneurship Index <dbl>,
## # Inflation rate <dbl>, Female Labor Force Participation Rate <dbl>
#Instalación de paquetes
#install.packages(“dplyr”) #install.packages(“ggplot2”) #install.packages(“GGally”) #install.packages(“Hmisc”) #install.packages(“corrplot”) #install.packages(“PerformanceAnalytics”)
#Llamar los paquetes instalados con los que vamos a trabajar
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v dplyr 1.0.7
## v tibble 3.1.2 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.5
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 4.0.3
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(corrplot)
## corrplot 0.90 loaded
library(stats)
EXPLORACIÓN DE DATOS
#emprendimiento femenino, tasa laboral femenina y asiganarlo en el objeto df4
df4<-Dataset3 %>% group_by(Country, `European Union Membership`,`Inflation rate`,`Women Entrepreneurship Index`, `Female Labor Force Participation Rate`)
df4
## # A tibble: 51 x 9
## # Groups: Country, European Union Membership, Inflation rate, Women
## # Entrepreneurship Index, Female Labor Force Participation Rate [51]
## No Country `Level of develo~ `European Union M~ Currency `Women Entrepren~
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 4 Austria Developed Member Euro 54.9
## 2 6 Belgium Developed Member Euro 63.6
## 3 17 Estonia Developed Member Euro 55.4
## 4 18 Finland Developed Member Euro 66.4
## 5 19 France Developed Member Euro 68.8
## 6 20 Germany Developed Member Euro 63.6
## 7 22 Greece Developed Member Euro 43
## 8 28 Ireland Developed Member Euro 64.3
## 9 30 Italy Developed Member Euro 51.4
## 10 34 Latvia Developed Member Euro 56.6
## # ... with 41 more rows, and 3 more variables: Entrepreneurship Index <dbl>,
## # Inflation rate <dbl>, Female Labor Force Participation Rate <dbl>
###ANÁLISIS DE NORMALIDAD DE LAS VARIABLES
shapiro.test(Dataset3$`Women Entrepreneurship Index`)
##
## Shapiro-Wilk normality test
##
## data: Dataset3$`Women Entrepreneurship Index`
## W = 0.94077, p-value = 0.01318
shapiro.test(Dataset3$`Female Labor Force Participation Rate`)
##
## Shapiro-Wilk normality test
##
## data: Dataset3$`Female Labor Force Participation Rate`
## W = 0.85947, p-value = 2.399e-05
shapiro.test(Dataset3$`Inflation rate`)
##
## Shapiro-Wilk normality test
##
## data: Dataset3$`Inflation rate`
## W = 0.68624, p-value = 3.728e-09
shapiro.test(Dataset3$`Entrepreneurship Index`)
##
## Shapiro-Wilk normality test
##
## data: Dataset3$`Entrepreneurship Index`
## W = 0.91004, p-value = 0.0009261
###Las cuatro variables no son normales, por lo que recurrimos a la ###prueba de Spearman en lugar de Pearson.
Inflation rate,Women Entrepreneurship Index, Female Labor Force Participation Ratedf10<-Dataset3[1:51,6:9]
df10
## # A tibble: 51 x 4
## `Women Entrepreneur~ `Entrepreneurshi~ `Inflation rate` `Female Labor Force ~
## <dbl> <dbl> <dbl> <dbl>
## 1 54.9 64.9 0.9 67.1
## 2 63.6 65.5 0.6 58
## 3 55.4 60.2 -0.88 68.5
## 4 66.4 65.7 -0.2 67.7
## 5 68.8 67.3 0 60.6
## 6 63.6 67.4 0.5 69.9
## 7 43 42 -1.7 42.5
## 8 64.3 65.3 -0.3 59.4
## 9 51.4 41.3 0 47.2
## 10 56.6 54.5 0.2 66.4
## # ... with 41 more rows
cor(df10, method = c("spearman"))
## Women Entrepreneurship Index
## Women Entrepreneurship Index 1.0000000
## Entrepreneurship Index 0.9077539
## Inflation rate -0.3946565
## Female Labor Force Participation Rate 0.4346547
## Entrepreneurship Index Inflation rate
## Women Entrepreneurship Index 0.9077539 -0.39465651
## Entrepreneurship Index 1.0000000 -0.37079984
## Inflation rate -0.3707998 1.00000000
## Female Labor Force Participation Rate 0.3710827 -0.00493626
## Female Labor Force Participation Rate
## Women Entrepreneurship Index 0.43465472
## Entrepreneurship Index 0.37108270
## Inflation rate -0.00493626
## Female Labor Force Participation Rate 1.00000000
rcorr(as.matrix(df10,method = c("spearman")))
## Women Entrepreneurship Index
## Women Entrepreneurship Index 1.00
## Entrepreneurship Index 0.91
## Inflation rate -0.46
## Female Labor Force Participation Rate 0.44
## Entrepreneurship Index Inflation rate
## Women Entrepreneurship Index 0.91 -0.46
## Entrepreneurship Index 1.00 -0.40
## Inflation rate -0.40 1.00
## Female Labor Force Participation Rate 0.33 -0.14
## Female Labor Force Participation Rate
## Women Entrepreneurship Index 0.44
## Entrepreneurship Index 0.33
## Inflation rate -0.14
## Female Labor Force Participation Rate 1.00
##
## n= 51
##
##
## P
## Women Entrepreneurship Index
## Women Entrepreneurship Index
## Entrepreneurship Index 0.0000
## Inflation rate 0.0008
## Female Labor Force Participation Rate 0.0012
## Entrepreneurship Index Inflation rate
## Women Entrepreneurship Index 0.0000 0.0008
## Entrepreneurship Index 0.0041
## Inflation rate 0.0041
## Female Labor Force Participation Rate 0.0165 0.3279
## Female Labor Force Participation Rate
## Women Entrepreneurship Index 0.0012
## Entrepreneurship Index 0.0165
## Inflation rate 0.3279
## Female Labor Force Participation Rate
####Elabore los gráficos de frecuencia de cada una de las variables
hist(Dataset3$`Women Entrepreneurship Index`)
hist(Dataset3$`Female Labor Force Participation Rate`)
hist(Dataset3$`Inflation rate`)
hist(Dataset3$`Entrepreneurship Index`)
####Elabore el gráfico de frecuencia de cada una de las variables ggplot2 #### con la variable Dataset3\(`Women Entrepreneurship Index` y rellene con la ####variable Dataset3\)“European Union Membership”
library(ggplot2)
ggplot(data = Dataset3,
mapping = aes(x = Dataset3$`Women Entrepreneurship Index`,
fill = Dataset3$"European Union Membership")) +
geom_histogram(bins = 9,
position = 'identity',
alpha = 0.8) +
labs(title = 'Indice de Emprendimiento de las Mujeres',
fill = 'Estados Miembros y No Miembros',
x = "Indice de Emprendimiento",
y = 'Frecuencia de Casos',
subtitle = 'Histograma de Frecuencia',
caption = 'BAse de datos de R "Dataset3 de Kaggle"')
####Elabore el gráfico de frecuencia de cada una de las variables ggplot2 #### con la variable Dataset3\("Entrepreneurship Index" y rellene con la ####variable Dataset3\)“European Union Membership”
library(ggplot2)
ggplot(data = Dataset3,
mapping = aes(x = Dataset3$"Entrepreneurship Index",
fill = Dataset3$"European Union Membership")) +
geom_histogram(bins = 9,
position = 'identity',
alpha = 0.8) +
labs(title = 'Indice de Emprendimiento',
fill = 'Estados Miembros y No Miembros',
x = "Indice de Emprendimiento",
y = 'Frecuencia de Casos',
subtitle = 'Histograma de Frecuencia',
caption = 'BAse de datos de R "Dataset3 de Kaggle"')
####Elabore el gráfico de frecuencia de cada una de las variables ggplot2 #### con la variable Dataset3\("Female Labor Force Participation Rate" y rellene con la ####variable Dataset3\)“European Union Membership”
library(ggplot2)
ggplot(data = Dataset3,
mapping = aes(x = Dataset3$"Female Labor Force Participation Rate",
fill = Dataset3$"European Union Membership")) +
geom_histogram(bins = 9,
position = 'identity',
alpha = 0.8) +
labs(title = 'Tasa de Participación Laboral de la Mujer',
fill = 'Estados Miembros y No Miembros',
x = "Tasa Labora Femenina",
y = 'Frecuencia de Casos',
subtitle = 'Histograma de Frecuencia',
caption = 'BAse de datos de R "Dataset3 de Kaggle"')
####Elabore el gráfico de frecuencia de cada una de las variables ggplot2 #### con la variableDataset3\(`Inflation rate` y rellene con la ####variable Dataset3\)“European Union Membership”
library(ggplot2)
ggplot(data = Dataset3,
mapping = aes(x = Dataset3$`Inflation rate`,
fill = Dataset3$"European Union Membership")) +
geom_histogram(bins = 9,
position = 'identity',
alpha = 0.8) +
labs(title = 'Tasa de de inflación',
fill = 'Estados Miembros y No Miembros',
x = "Inflación %",
y = 'Frecuencia de Casos',
subtitle = 'Histograma de Frecuencia',
caption = 'BAse de datos de R "Dataset3 de Kaggle"')
####Elabore el gráfico de Boxplot del índice de emprendimiento femenino y #### por países miembros y no miembros de la UE
boxplot(Dataset3$`Women Entrepreneurship Index`~Dataset3$`European Union Membership`)
####Elabore el gráfico de dispersión x = Dataset3\(`Entrepreneurship Index`, ####y = Dataset3\)Women Entrepreneurship Indexen ggplot2 #### con la variable Dataset3\(`Inflation rate` y utilice ####facet_grid(Dataset3\)Level of development ~ Dataset3$European Union Membership) ####Tambien utilice la suavización de la línea con intervalos de confianza al 95%
ggplot(Dataset3, aes(x = Dataset3$`Entrepreneurship Index`, y = Dataset3$`Women Entrepreneurship Index`)) +
geom_point() + geom_smooth() + facet_grid(Dataset3$`Level of development` ~ Dataset3$`European Union Membership`)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#####################Pruebas de Hipótesis t.test Medias############################# #################################################################################### ################################################################################### Dataset3
ICM<-log(Dataset3$`Women Entrepreneurship Index`)
Dataset3<-cbind(Dataset3, ICM)
IC<-log(Dataset3$`Entrepreneurship Index`)
Dataset3<-cbind(Dataset3, ICM)
##Normalidad Shapiro de las variables normalizadas
shapiro.test(Dataset3$ICM)
##
## Shapiro-Wilk normality test
##
## data: Dataset3$ICM
## W = 0.94269, p-value = 0.01577
hist(Dataset3$ICM)
############Recodificación de la variable índice de emprendimiento de la mujer## #####Esta recodificación es necesaria para llevar la prueba de hipótesis de la media
ICMM<-ifelse(Dataset3$`Women Entrepreneurship Index`>50.0, 1,0)
Dataset3<-cbind(Dataset3, ICMM)
Dataset3
## No Country Level of development European Union Membership
## 1 4 Austria Developed Member
## 2 6 Belgium Developed Member
## 3 17 Estonia Developed Member
## 4 18 Finland Developed Member
## 5 19 France Developed Member
## 6 20 Germany Developed Member
## 7 22 Greece Developed Member
## 8 28 Ireland Developed Member
## 9 30 Italy Developed Member
## 10 34 Latvia Developed Member
## 11 35 Lithuania Developed Member
## 12 39 Netherlands Developed Member
## 13 50 Slovakia Developed Member
## 14 51 Slovenia Developed Member
## 15 53 Spain Developed Member
## 16 12 Croatia Developed Member
## 17 13 Denmark Developed Member
## 18 24 Hungary Developed Member
## 19 44 Poland Developed Member
## 20 54 Sweden Developed Member
## 21 3 Australia Developed Not Member
## 22 25 Iceland Developed Not Member
## 23 32 Japan Developed Not Member
## 24 40 Norway Developed Not Member
## 25 49 Singapore Developed Not Member
## 26 55 Switzerland Developed Not Member
## 27 56 Taiwan Developed Not Member
## 28 1 Algeria Developing Not Member
## 29 2 Argentina Developing Not Member
## 30 7 Bolivia Developing Not Member
## 31 8 Bosnia and Herzegovina Developing Not Member
## 32 9 Brazil Developing Not Member
## 33 10 China Developing Not Member
## 34 11 Costa Rica Developing Not Member
## 35 14 Ecuador Developing Not Member
## 36 15 Egypt Developing Not Member
## 37 16 El Salvador Developing Not Member
## 38 21 Ghana Developing Not Member
## 39 26 India Developing Not Member
## 40 31 Jamaica Developing Not Member
## 41 36 Macedonia Developing Not Member
## 42 37 Malaysia Developing Not Member
## 43 38 Mexico Developing Not Member
## 44 42 Panama Developing Not Member
## 45 43 Peru Developing Not Member
## 46 47 Russia Developing Not Member
## 47 48 Saudi Arabia Developing Not Member
## 48 57 Thailand Developing Not Member
## 49 58 Tunisia Developing Not Member
## 50 59 Turkey Developing Not Member
## 51 60 Uruguay Developing Not Member
## Currency Women Entrepreneurship Index Entrepreneurship Index
## 1 Euro 54.9 64.9
## 2 Euro 63.6 65.5
## 3 Euro 55.4 60.2
## 4 Euro 66.4 65.7
## 5 Euro 68.8 67.3
## 6 Euro 63.6 67.4
## 7 Euro 43.0 42.0
## 8 Euro 64.3 65.3
## 9 Euro 51.4 41.3
## 10 Euro 56.6 54.5
## 11 Euro 58.5 54.6
## 12 Euro 69.3 66.5
## 13 Euro 54.8 45.4
## 14 Euro 55.9 53.1
## 15 Euro 52.5 49.6
## 16 National Currency 49.9 40.6
## 17 National Currency 69.7 71.4
## 18 National Currency 53.7 42.7
## 19 National Currency 57.7 47.4
## 20 National Currency 66.7 71.8
## 21 National Currency 74.8 77.6
## 22 National Currency 68.0 70.4
## 23 National Currency 40.0 49.5
## 24 National Currency 66.3 65.6
## 25 National Currency 59.8 68.1
## 26 National Currency 63.7 68.6
## 27 National Currency 53.4 69.1
## 28 National Currency 27.4 30.2
## 29 National Currency 35.7 37.2
## 30 National Currency 29.7 28.0
## 31 National Currency 31.6 28.9
## 32 National Currency 31.1 25.8
## 33 National Currency 38.3 36.4
## 34 National Currency 36.1 37.7
## 35 National Currency 32.3 28.2
## 36 National Currency 27.7 28.1
## 37 National Currency 29.9 29.6
## 38 National Currency 25.8 24.8
## 39 National Currency 25.3 25.3
## 40 National Currency 38.6 27.2
## 41 National Currency 41.2 37.1
## 42 National Currency 39.2 40.0
## 43 National Currency 42.8 30.7
## 44 National Currency 36.9 32.2
## 45 National Currency 43.6 30.9
## 46 National Currency 35.6 31.7
## 47 National Currency 37.0 49.6
## 48 National Currency 36.6 32.1
## 49 National Currency 30.7 35.5
## 50 National Currency 39.3 54.6
## 51 National Currency 44.5 41.4
## Inflation rate Female Labor Force Participation Rate ICM ICM ICMM
## 1 0.90 67.10 4.005513 4.005513 1
## 2 0.60 58.00 4.152613 4.152613 1
## 3 -0.88 68.50 4.014580 4.014580 1
## 4 -0.20 67.70 4.195697 4.195697 1
## 5 0.00 60.60 4.231204 4.231204 1
## 6 0.50 69.90 4.152613 4.152613 1
## 7 -1.70 42.50 3.761200 3.761200 0
## 8 -0.30 59.40 4.163560 4.163560 1
## 9 0.00 47.20 3.939638 3.939638 1
## 10 0.20 66.40 4.036009 4.036009 1
## 11 -0.90 66.50 4.069027 4.069027 1
## 12 0.60 69.20 4.238445 4.238445 1
## 13 -0.30 55.90 4.003690 4.003690 1
## 14 -0.50 61.00 4.023564 4.023564 1
## 15 -0.50 52.70 3.960813 3.960813 1
## 16 -0.50 60.40 3.910021 3.910021 0
## 17 0.50 70.30 4.244200 4.244200 1
## 18 -0.10 57.80 3.983413 3.983413 1
## 19 -0.90 56.60 4.055257 4.055257 1
## 20 0.00 74.00 4.200205 4.200205 1
## 21 1.50 66.80 4.314818 4.314818 1
## 22 1.60 82.30 4.219508 4.219508 1
## 23 0.80 64.70 3.688879 3.688879 0
## 24 2.17 69.20 4.194190 4.194190 1
## 25 -0.50 59.18 4.091006 4.091006 1
## 26 -1.10 74.70 4.154185 4.154185 1
## 27 -0.61 55.00 3.977811 3.977811 1
## 28 4.80 18.00 3.310543 3.310543 0
## 29 26.50 47.30 3.575151 3.575151 0
## 30 4.10 69.40 3.391147 3.391147 0
## 31 -1.00 51.90 3.453157 3.453157 0
## 32 10.67 55.90 3.437208 3.437208 0
## 33 1.40 62.40 3.645450 3.645450 0
## 34 0.80 59.40 3.586293 3.586293 0
## 35 -0.50 63.50 3.475067 3.475067 0
## 36 11.00 64.60 3.321432 3.321432 0
## 37 -2.25 55.70 3.397858 3.397858 0
## 38 17.20 60.80 3.250374 3.250374 0
## 39 5.90 61.10 3.230804 3.230804 0
## 40 3.70 37.70 3.653252 3.653252 0
## 41 3.70 73.00 3.718438 3.718438 0
## 42 2.30 58.50 3.668677 3.668677 0
## 43 2.70 44.70 3.756538 3.756538 0
## 44 0.10 67.90 3.608212 3.608212 0
## 45 3.50 63.40 3.775057 3.775057 0
## 46 15.50 65.20 3.572346 3.572346 0
## 47 1.20 13.00 3.610918 3.610918 0
## 48 -0.90 62.00 3.600048 3.600048 0
## 49 4.80 25.19 3.424263 3.424263 0
## 50 7.70 30.40 3.671225 3.671225 0
## 51 8.67 68.00 3.795489 3.795489 0
Pruebas de las medias entre el índice normalizado ICM y índice alto y bajo de
###competitividad de las mujeres
tt1<-t.test( ICM ~ ICMM ,data=Dataset3)
tt1
##
## Welch Two Sample t-test
##
## data: ICM by ICMM
## t = -13.339, df = 43.573, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6250306 -0.4609106
## sample estimates:
## mean in group 0 mean in group 1
## 3.566261 4.109232
#Prueba de potencia # Realice la prueba de potencia para el error tipo Beta al 95% con 51 observaciones. #Delta es la diferencia de las medias 3.566261 - 4.109232 (Resultados de la tabla anterior)
sdif_1<- tapply(Dataset3$ICM,Dataset3$ICMM, sd)
sdif_1
## 0 1
## 0.1778147 0.1079920
power.t.test(n=315, delta= -0.542979, sd= 0.00698227, sig.level=0.05,power=NULL, alternative= c("two.sided"))
##
## Two-sample t test power calculation
##
## n = 315
## delta = 0.542979
## sd = 0.00698227
## sig.level = 0.05
## power = 1
## alternative = two.sided
##
## NOTE: n is number in *each* group
############################REGRESIÓN LINEAL##################### #Aun y cuando vimos que no existe normalidad en las variables de estudio realice un #análisis de regresión lineal.
x<-Dataset3$`Inflation rate`
y<-Dataset3$`Women Entrepreneurship Index`
modelo<-lm(y ~ x)
summary(modelo)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.7791 -10.0047 0.9349 9.8225 25.6508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.9611 1.9974 25.513 < 2e-16 ***
## x -1.2080 0.3372 -3.582 0.000782 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.83 on 49 degrees of freedom
## Multiple R-squared: 0.2075, Adjusted R-squared: 0.1913
## F-statistic: 12.83 on 1 and 49 DF, p-value: 0.0007823
#Elabore el gráfico de la regresión
plot(x,y)
abline(a=modelo$coefficients[1],b=modelo$coefficients[2])
##Calcule el intervalo de confianza y evalue el modelo
confint(modelo)
## 2.5 % 97.5 %
## (Intercept) 46.947176 54.9751120
## x -1.885704 -0.5302744
res <- residuals( modelo ) # residuos
pre <- predict(modelo) #predicciones
###Modelo BIC y Modelo AIC
BIC(modelo)
## [1] 414.7773
AIC(modelo)
## [1] 408.9819
#diagnostico, que se realiza a traves de analisis de residuos
#Elabore un segundo modelo de regersión lineal sin el intercepto para evaluar #R-Squared o el R cuadrado
modelo2<-lm(y ~ 0 + x)
summary(modelo2)
##
## Call:
## lm(formula = y ~ 0 + x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.91 32.55 47.34 60.78 70.97
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 2.551 1.135 2.248 0.029 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.01 on 50 degrees of freedom
## Multiple R-squared: 0.09177, Adjusted R-squared: 0.07361
## F-statistic: 5.052 on 1 and 50 DF, p-value: 0.02904
plot(x,y)
abline(a=modelo2$coefficients[1],b=modelo$coefficients[2])
############################REGRESIÓN LOGÍSTICA#####################
##Ajuste de un modelo logístico simple con una variable independiente
##f4$Women Entrepreneurship Index, la cual arriba de la media se considera un ##Indice alto de emprendimiento con codificación 1, mientras abajo de ese umbral ## se considera un indice bajo con 0.
summary(Dataset3$`Women Entrepreneurship Index`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.30 36.35 44.50 47.84 59.15 74.80
Dataset3$`Women Entrepreneurship Index`<-ifelse(Dataset3$`Women Entrepreneurship Index` >44.5,1,0)
Dataset3
## No Country Level of development European Union Membership
## 1 4 Austria Developed Member
## 2 6 Belgium Developed Member
## 3 17 Estonia Developed Member
## 4 18 Finland Developed Member
## 5 19 France Developed Member
## 6 20 Germany Developed Member
## 7 22 Greece Developed Member
## 8 28 Ireland Developed Member
## 9 30 Italy Developed Member
## 10 34 Latvia Developed Member
## 11 35 Lithuania Developed Member
## 12 39 Netherlands Developed Member
## 13 50 Slovakia Developed Member
## 14 51 Slovenia Developed Member
## 15 53 Spain Developed Member
## 16 12 Croatia Developed Member
## 17 13 Denmark Developed Member
## 18 24 Hungary Developed Member
## 19 44 Poland Developed Member
## 20 54 Sweden Developed Member
## 21 3 Australia Developed Not Member
## 22 25 Iceland Developed Not Member
## 23 32 Japan Developed Not Member
## 24 40 Norway Developed Not Member
## 25 49 Singapore Developed Not Member
## 26 55 Switzerland Developed Not Member
## 27 56 Taiwan Developed Not Member
## 28 1 Algeria Developing Not Member
## 29 2 Argentina Developing Not Member
## 30 7 Bolivia Developing Not Member
## 31 8 Bosnia and Herzegovina Developing Not Member
## 32 9 Brazil Developing Not Member
## 33 10 China Developing Not Member
## 34 11 Costa Rica Developing Not Member
## 35 14 Ecuador Developing Not Member
## 36 15 Egypt Developing Not Member
## 37 16 El Salvador Developing Not Member
## 38 21 Ghana Developing Not Member
## 39 26 India Developing Not Member
## 40 31 Jamaica Developing Not Member
## 41 36 Macedonia Developing Not Member
## 42 37 Malaysia Developing Not Member
## 43 38 Mexico Developing Not Member
## 44 42 Panama Developing Not Member
## 45 43 Peru Developing Not Member
## 46 47 Russia Developing Not Member
## 47 48 Saudi Arabia Developing Not Member
## 48 57 Thailand Developing Not Member
## 49 58 Tunisia Developing Not Member
## 50 59 Turkey Developing Not Member
## 51 60 Uruguay Developing Not Member
## Currency Women Entrepreneurship Index Entrepreneurship Index
## 1 Euro 1 64.9
## 2 Euro 1 65.5
## 3 Euro 1 60.2
## 4 Euro 1 65.7
## 5 Euro 1 67.3
## 6 Euro 1 67.4
## 7 Euro 0 42.0
## 8 Euro 1 65.3
## 9 Euro 1 41.3
## 10 Euro 1 54.5
## 11 Euro 1 54.6
## 12 Euro 1 66.5
## 13 Euro 1 45.4
## 14 Euro 1 53.1
## 15 Euro 1 49.6
## 16 National Currency 1 40.6
## 17 National Currency 1 71.4
## 18 National Currency 1 42.7
## 19 National Currency 1 47.4
## 20 National Currency 1 71.8
## 21 National Currency 1 77.6
## 22 National Currency 1 70.4
## 23 National Currency 0 49.5
## 24 National Currency 1 65.6
## 25 National Currency 1 68.1
## 26 National Currency 1 68.6
## 27 National Currency 1 69.1
## 28 National Currency 0 30.2
## 29 National Currency 0 37.2
## 30 National Currency 0 28.0
## 31 National Currency 0 28.9
## 32 National Currency 0 25.8
## 33 National Currency 0 36.4
## 34 National Currency 0 37.7
## 35 National Currency 0 28.2
## 36 National Currency 0 28.1
## 37 National Currency 0 29.6
## 38 National Currency 0 24.8
## 39 National Currency 0 25.3
## 40 National Currency 0 27.2
## 41 National Currency 0 37.1
## 42 National Currency 0 40.0
## 43 National Currency 0 30.7
## 44 National Currency 0 32.2
## 45 National Currency 0 30.9
## 46 National Currency 0 31.7
## 47 National Currency 0 49.6
## 48 National Currency 0 32.1
## 49 National Currency 0 35.5
## 50 National Currency 0 54.6
## 51 National Currency 0 41.4
## Inflation rate Female Labor Force Participation Rate ICM ICM ICMM
## 1 0.90 67.10 4.005513 4.005513 1
## 2 0.60 58.00 4.152613 4.152613 1
## 3 -0.88 68.50 4.014580 4.014580 1
## 4 -0.20 67.70 4.195697 4.195697 1
## 5 0.00 60.60 4.231204 4.231204 1
## 6 0.50 69.90 4.152613 4.152613 1
## 7 -1.70 42.50 3.761200 3.761200 0
## 8 -0.30 59.40 4.163560 4.163560 1
## 9 0.00 47.20 3.939638 3.939638 1
## 10 0.20 66.40 4.036009 4.036009 1
## 11 -0.90 66.50 4.069027 4.069027 1
## 12 0.60 69.20 4.238445 4.238445 1
## 13 -0.30 55.90 4.003690 4.003690 1
## 14 -0.50 61.00 4.023564 4.023564 1
## 15 -0.50 52.70 3.960813 3.960813 1
## 16 -0.50 60.40 3.910021 3.910021 0
## 17 0.50 70.30 4.244200 4.244200 1
## 18 -0.10 57.80 3.983413 3.983413 1
## 19 -0.90 56.60 4.055257 4.055257 1
## 20 0.00 74.00 4.200205 4.200205 1
## 21 1.50 66.80 4.314818 4.314818 1
## 22 1.60 82.30 4.219508 4.219508 1
## 23 0.80 64.70 3.688879 3.688879 0
## 24 2.17 69.20 4.194190 4.194190 1
## 25 -0.50 59.18 4.091006 4.091006 1
## 26 -1.10 74.70 4.154185 4.154185 1
## 27 -0.61 55.00 3.977811 3.977811 1
## 28 4.80 18.00 3.310543 3.310543 0
## 29 26.50 47.30 3.575151 3.575151 0
## 30 4.10 69.40 3.391147 3.391147 0
## 31 -1.00 51.90 3.453157 3.453157 0
## 32 10.67 55.90 3.437208 3.437208 0
## 33 1.40 62.40 3.645450 3.645450 0
## 34 0.80 59.40 3.586293 3.586293 0
## 35 -0.50 63.50 3.475067 3.475067 0
## 36 11.00 64.60 3.321432 3.321432 0
## 37 -2.25 55.70 3.397858 3.397858 0
## 38 17.20 60.80 3.250374 3.250374 0
## 39 5.90 61.10 3.230804 3.230804 0
## 40 3.70 37.70 3.653252 3.653252 0
## 41 3.70 73.00 3.718438 3.718438 0
## 42 2.30 58.50 3.668677 3.668677 0
## 43 2.70 44.70 3.756538 3.756538 0
## 44 0.10 67.90 3.608212 3.608212 0
## 45 3.50 63.40 3.775057 3.775057 0
## 46 15.50 65.20 3.572346 3.572346 0
## 47 1.20 13.00 3.610918 3.610918 0
## 48 -0.90 62.00 3.600048 3.600048 0
## 49 4.80 25.19 3.424263 3.424263 0
## 50 7.70 30.40 3.671225 3.671225 0
## 51 8.67 68.00 3.795489 3.795489 0
table(Dataset3$`Women Entrepreneurship Index`)
##
## 0 1
## 26 25
boxplot(Dataset3$`Inflation rate`~Dataset3$`Women Entrepreneurship Index`, Dataset3)
modelo_logit1 <- glm(Dataset3$`Women Entrepreneurship Index` ~ Dataset3$`Inflation rate`,
data = Dataset3, family = "binomial")
summary(modelo_logit1)
##
## Call:
## glm(formula = Dataset3$`Women Entrepreneurship Index` ~ Dataset3$`Inflation rate`,
## family = "binomial", data = Dataset3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.10469 -0.64982 -0.00066 0.85234 1.43209
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7347 0.3712 1.979 0.04779 *
## Dataset3$`Inflation rate` -0.6065 0.2144 -2.828 0.00468 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70.681 on 50 degrees of freedom
## Residual deviance: 50.488 on 49 degrees of freedom
## AIC: 54.488
##
## Number of Fisher Scoring iterations: 6
###Evaluación del modelo Logit simple #1
#dif_residuos1 <- modelo_logit1\(null.deviance - modelo_logit1\)deviance
#df1 <- modelo_logit1\(df.null - modelo_logit1\)df.residual
#p_value1 <- pchisq(q = dif_residuos1,df = df1, lower.tail = FALSE)
#p_value1
dif_residuos1 <- modelo_logit1$null.deviance - modelo_logit1$deviance
# Grados libertad
df1 <- modelo_logit1$df.null - modelo_logit1$df.residual
# p-value
p_value1 <- pchisq(q = dif_residuos1,df = df1, lower.tail = FALSE)
p_value1
## [1] 6.998e-06
#Calcule el intervalo de confianza del modelo
confint(object = modelo_logit1, level = 0.95)
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 2.5 % 97.5 %
## (Intercept) 0.0376576 1.5106602
## Dataset3$`Inflation rate` -1.1056479 -0.2579834
#si alguien tiene una inflación de 3.0, cual es la #probabilidad de tener un indice alto
inflación<-3.0
##La probabilidad es:
exp(0.7347-0.6065*(inflación))/(1+exp(0.7347-0.6065*(inflación)))*100
## [1] 25.25987
#######################Modelo Logit Múltiple No.2 ################################
modelo_logit <- glm(`Women Entrepreneurship Index` ~ `Inflation rate` +
`Female Labor Force Participation Rate`,
data = Dataset3, family = "binomial")
summary(modelo_logit)
##
## Call:
## glm(formula = `Women Entrepreneurship Index` ~ `Inflation rate` +
## `Female Labor Force Participation Rate`, family = "binomial",
## data = Dataset3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.97313 -0.46280 -0.00006 0.79328 1.55737
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.44365 3.37142 -1.911 0.0560 .
## `Inflation rate` -0.73190 0.29208 -2.506 0.0122 *
## `Female Labor Force Participation Rate` 0.11830 0.05557 2.129 0.0333 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70.681 on 50 degrees of freedom
## Residual deviance: 42.644 on 48 degrees of freedom
## AIC: 48.644
##
## Number of Fisher Scoring iterations: 7
#Una forma para mejorar la interpretación de los exponentes es elevarlos exp
exp(coefficients(modelo_logit))
## (Intercept) `Inflation rate`
## 0.001590594 0.480995725
## `Female Labor Force Participation Rate`
## 1.125585100
##El comando confint muestra el intervalo de confianza del modelo Logit
confint(modelo_logit)
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 2.5 % 97.5 %
## (Intercept) -14.11299093 -0.9113272
## `Inflation rate` -1.42420583 -0.2788619
## `Female Labor Force Participation Rate` 0.02808125 0.2457519
###Evaluación del modelo Logit multiple
dif_residuos <- modelo_logit\(null.deviance - modelo_logit\)deviance
df <- modelo_logit\(df.null - modelo_logit\)df.residual
p_value <- pchisq(q = dif_residuos,df = df, lower.tail = FALSE)
p_value
dif_residuos <- modelo_logit$null.deviance - modelo_logit$deviance
# Grados libertad
df <- modelo_logit$df.null - modelo_logit$df.residual
# p-value
p_value <- pchisq(q = dif_residuos,df = df, lower.tail = FALSE)
p_value
## [1] 8.163183e-07