library(ggplot2)
library(msm)
library(sandwich)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(MASS)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(FactoMineR)
library(ggplot2)
library(nFactors)
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:msm':
##
## cav
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
##
## melanoma
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
library(readr)
sbie <- read_csv("~/Dr Nyagumbo/Sbie/sbie.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## relative_yield = col_double(),
## age = col_double(),
## years_in_simlesa = col_double(),
## no_hh_members = col_double(),
## hh_help = col_double(),
## contribution_of_labor = col_double()
## )
## See spec(...) for full column specifications.
View(sbie)
attach(sbie)
names(sbie)
## [1] "site" "season"
## [3] "farmer_name" "treatment"
## [5] "relative_yield" "sex"
## [7] "marital_status" "age"
## [9] "years_in_simlesa" "ca_before_simlesa"
## [11] "othe_research_trails" "reason_for_selection"
## [13] "soil_type" "no_hh_members"
## [15] "hh_help" "contribution_of_labor"
## [17] "education_level" "hle"
## [19] "paid_work" "own_work"
## [21] "pry_occupation" "sec_occupation"
## [23] "asset_ownership" "buildings"
## [25] "farm_tools" "cattle_ownership"
## [27] "other_livestock" "performance_simlesa"
## [29] "evaluation" "access_to_information"
## [31] "access_to_credit" "loan_purpose"
## [33] "living_standard"
#names(sbie)
site=as.factor(site)
season=as.factor(season)
treatment=as.factor(treatment)
sex=as.factor(sex)
marital_status=as.factor(marital_status)
#age continues variable
#years in simlesa
ca_before_simlesa=as.factor(ca_before_simlesa)
othe_research_trails=as.factor(othe_research_trails)
reason_for_selection=as.factor(reason_for_selection)
soil_type=as.factor(soil_type)
#no_hh_members continues
#hh_help number of hse hold that help in the field
#contribution_of_labour
education_level=as.factor(education_level)
hle=as.factor(hle)
paid_work=as.factor(paid_work)
own_work=as.factor(own_work)
pry_occupation=as.factor(pry_occupation)
sec_occupation=as.factor(sec_occupation)
asset_ownership=as.factor(asset_ownership)
buildings=as.factor(buildings)
farm_tools=as.factor(farm_tools)
cattle_ownership=as.factor(cattle_ownership)
other_livestock=as.factor(other_livestock)
performance_simlesa=as.factor(performance_simlesa)
evaluation=as.factor(evaluation)
access_to_information=as.factor(access_to_information)
access_to_credit=as.factor(access_to_credit)
loan_purpose=as.factor(loan_purpose)
living_standard=as.factor(living_standard)
#summary(sbie)
library(readr)
sbie_explanatory <- read_csv("~/Dr Nyagumbo/Sbie/sbie_explanatory.csv")
## Parsed with column specification:
## cols(
## .default = col_double()
## )
## See spec(...) for full column specifications.
View(sbie_explanatory)
attach(sbie_explanatory)
## The following objects are masked _by_ .GlobalEnv:
##
## access_to_credit, access_to_information, asset_ownership,
## buildings, ca_before_simlesa, cattle_ownership,
## education_level, evaluation, farm_tools, hle, living_standard,
## loan_purpose, marital_status, othe_research_trails,
## other_livestock, own_work, paid_work, performance_simlesa,
## pry_occupation, reason_for_selection, sec_occupation, sex,
## soil_type
## The following objects are masked from sbie:
##
## access_to_credit, access_to_information, age, asset_ownership,
## buildings, ca_before_simlesa, cattle_ownership,
## contribution_of_labor, education_level, evaluation,
## farm_tools, hh_help, hle, living_standard, loan_purpose,
## marital_status, no_hh_members, othe_research_trails,
## other_livestock, own_work, paid_work, performance_simlesa,
## pry_occupation, reason_for_selection, relative_yield,
## sec_occupation, sex, soil_type, years_in_simlesa
res.pca <- prcomp(sbie_explanatory, scale = TRUE)
fviz_eig(res.pca)
fviz_pca_var(res.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
fviz_pca_ind(res.pca,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
ev<-eigen(cor(sbie_explanatory))
ap<-parallel(subject=nrow(sbie_explanatory),var=ncol(sbie_explanatory),rep=100,cent=.05)
nS<-nScree(x=ev$values,parallel=ap$eigen$qevpea)
plotnScree(nS)
fac<-factanal(sbie_explanatory,5,rotation="varimax")
print(fac,digits = 2,sort=TRUE)
##
## Call:
## factanal(x = sbie_explanatory, factors = 5, rotation = "varimax")
##
## Uniquenesses:
## relative_yield sex marital_status
## 0.95 0.74 0.73
## age years_in_simlesa ca_before_simlesa
## 0.78 0.90 0.43
## othe_research_trails reason_for_selection soil_type
## 0.59 0.37 0.64
## no_hh_members hh_help contribution_of_labor
## 0.13 0.02 0.81
## education_level hle paid_work
## 0.54 0.63 0.66
## own_work pry_occupation sec_occupation
## 0.54 0.49 0.00
## asset_ownership buildings farm_tools
## 0.73 0.70 0.57
## cattle_ownership other_livestock performance_simlesa
## 0.63 0.45 0.32
## evaluation access_to_information access_to_credit
## 0.00 0.53 0.07
## loan_purpose living_standard
## 0.02 0.95
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5
## othe_research_trails -0.55 -0.14 -0.27
## reason_for_selection 0.58 -0.29 0.44 0.13
## own_work 0.52 -0.18 -0.13 0.37
## performance_simlesa 0.59 -0.13 0.36 0.42
## access_to_information 0.63 0.15 -0.21
## access_to_credit 0.81 0.26 0.14 -0.42
## loan_purpose -0.76 -0.33 -0.12 0.53
## ca_before_simlesa -0.30 -0.52 -0.44 0.13
## soil_type 0.28 0.51 0.12
## education_level 0.15 0.55 0.28 0.22
## sec_occupation 0.37 -0.73 -0.12 0.56
## cattle_ownership 0.58 -0.16
## no_hh_members -0.45 -0.21 0.77 0.14
## hh_help 0.11 0.86 0.33 -0.34
## paid_work 0.53 0.21
## hle 0.15 0.59
## other_livestock 0.70 -0.22
## pry_occupation 0.14 0.15 0.42 0.55
## evaluation 0.15 0.49 0.19 0.12 0.82
## relative_yield -0.20
## sex -0.46 0.11 0.18
## marital_status -0.15 -0.18 -0.46
## age -0.21 0.12 -0.23 0.32
## years_in_simlesa 0.26 0.16
## contribution_of_labor -0.23 0.33 0.13
## asset_ownership -0.13 0.50
## buildings 0.16 -0.35 0.38 0.11
## farm_tools -0.44 -0.35 0.30 -0.18
## living_standard 0.19 -0.10
##
## Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings 3.95 2.97 2.58 2.36 2.20
## Proportion Var 0.14 0.10 0.09 0.08 0.08
## Cumulative Var 0.14 0.24 0.33 0.41 0.49
##
## Test of the hypothesis that 5 factors are sufficient.
## The chi square statistic is 16890.25 on 271 degrees of freedom.
## The p-value is 0
cutoff=.4,
eig.val <- get_eigenvalue(res.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 4.9575116015 1.709487e+01 17.09487
## Dim.2 3.7528907914 1.294100e+01 30.03587
## Dim.3 3.4234641955 1.180505e+01 41.84092
## Dim.4 2.4369513464 8.403281e+00 50.24420
## Dim.5 2.1281275681 7.338371e+00 57.58257
## Dim.6 1.8677276394 6.440440e+00 64.02301
## Dim.7 1.6586307011 5.719416e+00 69.74243
## Dim.8 1.2856193211 4.433170e+00 74.17560
## Dim.9 1.2188170247 4.202817e+00 78.37841
## Dim.10 1.0516392521 3.626342e+00 82.00476
## Dim.11 0.8502617737 2.931937e+00 84.93669
## Dim.12 0.7700621125 2.655387e+00 87.59208
## Dim.13 0.6831232880 2.355598e+00 89.94768
## Dim.14 0.6616682150 2.281615e+00 92.22929
## Dim.15 0.4114402672 1.418760e+00 93.64805
## Dim.16 0.3991688407 1.376444e+00 95.02450
## Dim.17 0.3569257140 1.230778e+00 96.25527
## Dim.18 0.2724150828 9.393624e-01 97.19464
## Dim.19 0.2203903566 7.599667e-01 97.95460
## Dim.20 0.1677214419 5.783498e-01 98.53295
## Dim.21 0.1392943769 4.803254e-01 99.01328
## Dim.22 0.0821378871 2.832341e-01 99.29651
## Dim.23 0.0752868576 2.596099e-01 99.55612
## Dim.24 0.0503594344 1.736532e-01 99.72978
## Dim.25 0.0438519707 1.512137e-01 99.88099
## Dim.26 0.0197040930 6.794515e-02 99.94894
## Dim.27 0.0104784228 3.613249e-02 99.98507
## Dim.28 0.0041517546 1.431640e-02 99.99938
## Dim.29 0.0001786691 6.161004e-04 100.00000
Confirmatory factor analysis
library(sem)
library(lavaan)
## This is lavaan 0.6-3
## lavaan is BETA software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following objects are masked from 'package:sem':
##
## cfa, sem
## The following object is masked from 'package:psych':
##
## cor2cov
library(semPlot)
library(foreign)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
HS.model <- 'relative_yield=~ sex+marital_status+age+years_in_simlesa+ca_before_simlesa+othe_research_trails+other_livestock+reason_for_selection+soil_type+no_hh_members+hh_help+contribution_of_labor+education_level+hle+paid_work+own_work+pry_occupation+sec_occupation+asset_ownership+buildings+farm_tools+cattle_ownership+performance_simlesa+evaluation+access_to_information+access_to_credit+loan_purpose+living_standard'
fit <- cfa(HS.model, data=sbie_explanatory)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(fit, fit.measures=TRUE)
## lavaan 0.6-3 ended normally after 187 iterations
##
## Optimization method NLMINB
## Number of free parameters 56
##
## Number of observations 739
##
## Estimator ML
## Model Fit Test Statistic 24061.595
## Degrees of freedom 350
## P-value (Chi-square) 0.000
##
## Model test baseline model:
##
## Minimum Function Test Statistic 27120.185
## Degrees of freedom 378
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.113
## Tucker-Lewis Index (TLI) 0.042
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -29512.035
## Loglikelihood unrestricted model (H1) -17481.238
##
## Number of free parameters 56
## Akaike (AIC) 59136.071
## Bayesian (BIC) 59393.968
## Sample-size adjusted Bayesian (BIC) 59216.148
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.303
## 90 Percent Confidence Interval 0.300 0.306
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.191
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## relative_yield =~
## sex 1.000
## marital_status -0.833 0.646 -1.289 0.197
## age -12.019 3.647 -3.295 0.001
## years_in_simls -0.197 0.338 -0.583 0.560
## ca_before_smls 0.642 0.145 4.442 0.000
## oth_rsrch_trls 1.537 0.194 7.915 0.000
## other_livestck 1.601 0.229 6.983 0.000
## reasn_fr_slctn -2.447 0.336 -7.277 0.000
## soil_type -2.327 0.330 -7.059 0.000
## no_hh_members 10.651 1.437 7.411 0.000
## hh_help 0.728 0.920 0.791 0.429
## contrbtn_f_lbr 3.157 0.622 5.077 0.000
## education_levl -1.108 0.206 -5.384 0.000
## hle 0.683 0.123 5.539 0.000
## paid_work -0.239 0.120 -1.995 0.046
## own_work -14.285 1.771 -8.067 0.000
## pry_occupation -0.984 0.185 -5.318 0.000
## sec_occupation -3.893 0.922 -4.224 0.000
## asset_ownershp -0.493 0.263 -1.874 0.061
## buildings -0.073 0.224 -0.328 0.743
## farm_tools 0.969 0.142 6.833 0.000
## cattle_ownrshp -0.485 0.149 -3.256 0.001
## performnc_smls -1.091 0.170 -6.424 0.000
## evaluation -3.860 0.629 -6.135 0.000
## accss_t_nfrmtn -2.760 0.348 -7.933 0.000
## access_to_crdt -4.050 0.417 -9.721 0.000
## loan_purpose 30.558 3.147 9.710 0.000
## living_standrd -0.871 0.234 -3.718 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .sex 0.143 0.007 19.148 0.000
## .marital_status 5.594 0.291 19.221 0.000
## .age 160.615 8.360 19.213 0.000
## .years_in_simls 1.553 0.081 19.222 0.000
## .ca_before_smls 0.226 0.012 19.203 0.000
## .oth_rsrch_trls 0.176 0.009 19.079 0.000
## .other_livestck 0.350 0.018 19.144 0.000
## .reasn_fr_slctn 0.685 0.036 19.129 0.000
## .soil_type 0.707 0.037 19.141 0.000
## .no_hh_members 11.941 0.625 19.121 0.000
## .hh_help 11.468 0.597 19.222 0.000
## .contrbtn_f_lbr 3.846 0.200 19.195 0.000
## .education_levl 0.401 0.021 19.190 0.000
## .hle 0.140 0.007 19.187 0.000
## .paid_work 0.187 0.010 19.219 0.000
## .own_work 13.571 0.712 19.061 0.000
## .pry_occupation 0.328 0.017 19.191 0.000
## .sec_occupation 9.412 0.490 19.205 0.000
## .asset_ownershp 0.910 0.047 19.220 0.000
## .buildings 0.681 0.035 19.222 0.000
## .farm_tools 0.140 0.007 19.151 0.000
## .cattle_ownrshp 0.269 0.014 19.213 0.000
## .performnc_smls 0.223 0.012 19.165 0.000
## .evaluation 3.267 0.170 19.174 0.000
## .accss_t_nfrmtn 0.561 0.029 19.077 0.000
## .access_to_crdt 0.016 0.003 5.424 0.000
## .loan_purpose 1.392 0.178 7.825 0.000
## .living_standrd 0.640 0.033 19.210 0.000
## relative_yield 0.019 0.004 4.733 0.000
m1 <- lm(relative_yield ~farm_tools +othe_research_trails+soil_type+own_work+sec_occupation+access_to_credit+loan_purpose, data=sbie)
summary(m1) # printing out the results of the model
##
## Call:
## lm(formula = relative_yield ~ farm_tools + othe_research_trails +
## soil_type + own_work + sec_occupation + access_to_credit +
## loan_purpose, data = sbie)
##
## Residuals:
## Min 1Q Median 3Q Max
## -145.831 -31.412 -5.769 24.713 243.317
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 93.1654 17.4716 5.332
## farm_toolsrich -12.9113 5.9418 -2.173
## othe_research_trailsyes 6.0508 6.6881 0.905
## soil_typesandy -7.3260 6.9188 -1.059
## soil_typesandy clay 0.8456 6.5935 0.128
## own_workyes -13.6880 5.8738 -2.330
## sec_occupationnot agriculture related 54.9209 13.5045 4.067
## sec_occupationoff-farm self-employment 46.9567 15.0267 3.125
## sec_occupationown farming 47.3177 14.2966 3.310
## access_to_creditno -31.4186 12.5281 -2.508
## access_to_credityes -20.3618 11.5335 -1.765
## loan_purposenon agricultural 2.9935 6.8093 0.440
## Pr(>|t|)
## (Intercept) 1.38e-07 ***
## farm_toolsrich 0.03017 *
## othe_research_trailsyes 0.36598
## soil_typesandy 0.29009
## soil_typesandy clay 0.89800
## own_workyes 0.02012 *
## sec_occupationnot agriculture related 5.40e-05 ***
## sec_occupationoff-farm self-employment 0.00186 **
## sec_occupationown farming 0.00099 ***
## access_to_creditno 0.01241 *
## access_to_credityes 0.07800 .
## loan_purposenon agricultural 0.66037
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.39 on 599 degrees of freedom
## (129 observations deleted due to missingness)
## Multiple R-squared: 0.09467, Adjusted R-squared: 0.07805
## F-statistic: 5.694 on 11 and 599 DF, p-value: 9.201e-09
#anova(m1)
m1 <- lm(relative_yield ~farm_tools +othe_research_trails+soil_type+own_work+sec_occupation+access_to_credit+loan_purpose, family=“poisson”, data=sbie) #summary(m1) # printing out the results of the model anova(m1)
m2<-glm(relative_yield ~cattle_ownership+education_level+evaluation+reason_for_selection+buildings+ca_before_simlesa+no_hh_members+sex+asset_ownership, data=sbie)
anova(m2)
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: relative_yield
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 723 1773018
## cattle_ownership 2 6560 721 1766458
## education_level 2 25558 719 1740900
## evaluation 1 38068 718 1702832
## reason_for_selection 2 91968 716 1610864
## buildings 2 49567 714 1561297
## ca_before_simlesa 1 1837 713 1559459
## no_hh_members 1 29746 712 1529713
## sex 1 2789 711 1526925
## asset_ownership 2 54708 709 1472217
summary(m2)
##
## Call:
## glm(formula = relative_yield ~ cattle_ownership + education_level +
## evaluation + reason_for_selection + buildings + ca_before_simlesa +
## no_hh_members + sex + asset_ownership, data = sbie)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -122.909 -29.460 -2.867 23.935 229.798
##
## Coefficients:
## Estimate
## (Intercept) 40.7165
## cattle_ownershipbetween 1 and 9 16.0404
## cattle_ownershipzero 32.4499
## education_levellevel 7 to 12 13.6543
## education_levelnever been to school can<U+0092>t read / write 16.1746
## evaluationvery good 21.5578
## reason_for_selectionany other reason -19.2196
## reason_for_selectiongood soils plusability to run trails/hardworking 23.6870
## buildingspoor -21.4485
## buildingsrich -2.3207
## ca_before_simlesayes -0.2845
## no_hh_members 1.8202
## sexMale 11.6707
## asset_ownershippoor -47.7258
## asset_ownershiprich 14.7204
## Std. Error
## (Intercept) 22.3721
## cattle_ownershipbetween 1 and 9 17.7127
## cattle_ownershipzero 17.9333
## education_levellevel 7 to 12 4.8224
## education_levelnever been to school can<U+0092>t read / write 8.8325
## evaluationvery good 5.1618
## reason_for_selectionany other reason 7.3145
## reason_for_selectiongood soils plusability to run trails/hardworking 6.1897
## buildingspoor 5.6881
## buildingsrich 6.2718
## ca_before_simlesayes 4.7570
## no_hh_members 0.6043
## sexMale 6.8100
## asset_ownershippoor 13.3463
## asset_ownershiprich 5.3336
## t value
## (Intercept) 1.820
## cattle_ownershipbetween 1 and 9 0.906
## cattle_ownershipzero 1.809
## education_levellevel 7 to 12 2.831
## education_levelnever been to school can<U+0092>t read / write 1.831
## evaluationvery good 4.176
## reason_for_selectionany other reason -2.628
## reason_for_selectiongood soils plusability to run trails/hardworking 3.827
## buildingspoor -3.771
## buildingsrich -0.370
## ca_before_simlesayes -0.060
## no_hh_members 3.012
## sexMale 1.714
## asset_ownershippoor -3.576
## asset_ownershiprich 2.760
## Pr(>|t|)
## (Intercept) 0.069185
## cattle_ownershipbetween 1 and 9 0.365462
## cattle_ownershipzero 0.070800
## education_levellevel 7 to 12 0.004766
## education_levelnever been to school can<U+0092>t read / write 0.067480
## evaluationvery good 3.33e-05
## reason_for_selectionany other reason 0.008785
## reason_for_selectiongood soils plusability to run trails/hardworking 0.000141
## buildingspoor 0.000176
## buildingsrich 0.711473
## ca_before_simlesayes 0.952334
## no_hh_members 0.002685
## sexMale 0.087012
## asset_ownershippoor 0.000373
## asset_ownershiprich 0.005930
##
## (Intercept) .
## cattle_ownershipbetween 1 and 9
## cattle_ownershipzero .
## education_levellevel 7 to 12 **
## education_levelnever been to school can<U+0092>t read / write .
## evaluationvery good ***
## reason_for_selectionany other reason **
## reason_for_selectiongood soils plusability to run trails/hardworking ***
## buildingspoor ***
## buildingsrich
## ca_before_simlesayes
## no_hh_members **
## sexMale .
## asset_ownershippoor ***
## asset_ownershiprich **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 2076.47)
##
## Null deviance: 1773018 on 723 degrees of freedom
## Residual deviance: 1472217 on 709 degrees of freedom
## (16 observations deleted due to missingness)
## AIC: 7601.7
##
## Number of Fisher Scoring iterations: 2
Graphical presentation
stat_box_data <- function(relative_yield, upper_limit = max(relative_yield)*1.5) {
return(
data.frame(
y = 400,
label = paste('N =', length(relative_yield), '\n',
'Mean =', round(mean(relative_yield), 0), '\n')
)
)
}
#####################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width = 0.2,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
######################################################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+facet_wrap(.~education_level)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
#################################################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+facet_wrap(.~evaluation)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
###################################################################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+facet_wrap(.~living_standard)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
############################################
m<-ggplot(sbie, aes(x =education_level, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Educational Level")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
##############################################################
m<-ggplot(sbie, aes(x =loan_purpose, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Loan Purpose")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
###############################################################
m<-ggplot(sbie, aes(x =soil_type, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width = 0.3,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Soil Type")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
#####################################################
m<-ggplot(sbie, aes(x =living_standard, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width =0.3,outlier.colour = "red",outlier.shape = 1, shape=6,notch = TRUE) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Standard of Living")+theme_classic()
m+
stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2.2
)+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
## Warning: Removed 16 rows containing non-finite values (stat_summary).
Uing the models which accommodate binary variables
library(readr)
sbie_logit <- read_csv("~/Dr Nyagumbo/Sbie/sbie_logit.csv")
## Parsed with column specification:
## cols(
## .default = col_double()
## )
## See spec(...) for full column specifications.
View(sbie_logit)
attach(sbie_logit)
## The following objects are masked _by_ .GlobalEnv:
##
## access_to_credit, access_to_information, asset_ownership,
## buildings, ca_before_simlesa, cattle_ownership,
## education_level, evaluation, farm_tools, hle, living_standard,
## loan_purpose, marital_status, othe_research_trails,
## other_livestock, own_work, paid_work, performance_simlesa,
## pry_occupation, reason_for_selection, sec_occupation, sex,
## soil_type
## The following objects are masked from sbie_explanatory:
##
## access_to_credit, access_to_information, age, asset_ownership,
## buildings, ca_before_simlesa, cattle_ownership,
## contribution_of_labor, education_level, evaluation,
## farm_tools, hh_help, hle, living_standard, loan_purpose,
## marital_status, no_hh_members, othe_research_trails,
## other_livestock, own_work, paid_work, performance_simlesa,
## pry_occupation, reason_for_selection, relative_yield,
## sec_occupation, sex, soil_type, years_in_simlesa
## The following objects are masked from sbie:
##
## access_to_credit, access_to_information, age, asset_ownership,
## buildings, ca_before_simlesa, cattle_ownership,
## contribution_of_labor, education_level, evaluation,
## farm_tools, hh_help, hle, living_standard, loan_purpose,
## marital_status, no_hh_members, othe_research_trails,
## other_livestock, own_work, paid_work, performance_simlesa,
## pry_occupation, reason_for_selection, relative_yield,
## sec_occupation, sex, soil_type, years_in_simlesa
season=as.factor(season)
treatment=as.factor(treatment)
sex=as.factor(sex)
marital_status=as.factor(marital_status)
#age continues variable
#years in simlesa
ca_before_simlesa=as.factor(ca_before_simlesa)
othe_research_trails=as.factor(othe_research_trails)
reason_for_selection=as.factor(reason_for_selection)
soil_type=as.factor(soil_type)
#no_hh_members continues
#hh_help number of hse hold that help in the field
#contribution_of_labour
education_level=as.factor(education_level)
hle=as.factor(hle)
paid_work=as.factor(paid_work)
own_work=as.factor(own_work)
pry_occupation=as.factor(pry_occupation)
sec_occupation=as.factor(sec_occupation)
asset_ownership=as.factor(asset_ownership)
buildings=as.factor(buildings)
farm_tools=as.factor(farm_tools)
cattle_ownership=as.factor(cattle_ownership)
other_livestock=as.factor(other_livestock)
performance_simlesa=as.factor(performance_simlesa)
evaluation=as.factor(evaluation)
access_to_information=as.factor(access_to_information)
access_to_credit=as.factor(access_to_credit)
loan_purpose=as.factor(loan_purpose)
living_standard=as.factor(living_standard)
m_logit<- glm(yld ~farm_tools +othe_research_trails+own_work+sec_occupation+access_to_credit+loan_purpose,family=binomial(link='logit'),data=sbie_logit)
summary(m_logit)
##
## Call:
## glm(formula = yld ~ farm_tools + othe_research_trails + own_work +
## sec_occupation + access_to_credit + loan_purpose, family = binomial(link = "logit"),
## data = sbie_logit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1600 -1.1865 0.4518 1.0816 1.5342
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.11024 0.74637 -4.167 3.08e-05 ***
## farm_tools -0.80745 0.23349 -3.458 0.000544 ***
## othe_research_trails 0.66880 0.20104 3.327 0.000879 ***
## own_work -0.11904 0.02608 -4.564 5.02e-06 ***
## sec_occupation 0.07288 0.03136 2.324 0.020130 *
## access_to_credit 2.87817 0.63569 4.528 5.97e-06 ***
## loan_purpose 0.30191 0.07687 3.927 8.59e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1020.37 on 738 degrees of freedom
## Residual deviance: 956.27 on 732 degrees of freedom
## AIC: 970.27
##
## Number of Fisher Scoring iterations: 4
##################################
#Now we can run the anova() function on the model to analyze the table of deviance
anova(m_logit, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: yld
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 738 1020.37
## farm_tools 1 0.8256 737 1019.55 0.3635435
## othe_research_trails 1 6.5490 736 1013.00 0.0104944 *
## own_work 1 13.3401 735 999.66 0.0002598 ***
## sec_occupation 1 5.5292 734 994.13 0.0187013 *
## access_to_credit 1 15.2786 733 978.85 9.276e-05 ***
## loan_purpose 1 22.5784 732 956.27 2.017e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###
##While no exact equivalent to the R2 of linear regression exists, the McFadden R2 index can be used to assess the model fit.
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(m_logit)
## llh llhNull G2 McFadden r2ML
## -478.13669618 -510.18718807 64.10098379 0.06282104 0.08308469
## r2CU
## 0.11098527
##################### odds ratio
library(aod)
wald.test(b = coef(m_logit), Sigma = vcov(m_logit), Terms = 4:6)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 44.1, df = 3, P(> X2) = 1.4e-09
exp(cbind(OR = coef(m_logit), confint(m_logit)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.04459014 0.008684922 0.1727574
## farm_tools 0.44599283 0.280849791 0.7024191
## othe_research_trails 1.95189518 1.318189746 2.9010758
## own_work 0.88777308 0.843033360 0.9339093
## sec_occupation 1.07560326 1.011877710 1.1444455
## access_to_credit 17.78164351 5.863929626 75.9910882
## loan_purpose 1.35244419 1.179854383 1.6072672
n_logit<- glm(yld ~cattle_ownership+education_level+evaluation+reason_for_selection+buildings+ca_before_simlesa+no_hh_members+sex+asset_ownership,family=binomial(link='logit'),data=sbie_logit)
summary(n_logit)
##
## Call:
## glm(formula = yld ~ cattle_ownership + education_level + evaluation +
## reason_for_selection + buildings + ca_before_simlesa + no_hh_members +
## sex + asset_ownership, family = binomial(link = "logit"),
## data = sbie_logit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7857 -1.1307 0.6861 1.0533 1.7469
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.80608 0.53553 -1.505 0.13227
## cattle_ownership 0.27347 0.20209 1.353 0.17600
## education_level 0.45975 0.15160 3.033 0.00242 **
## evaluation -0.29744 0.05678 -5.238 1.62e-07 ***
## reason_for_selection -0.07908 0.11444 -0.691 0.48955
## buildings -0.04081 0.13472 -0.303 0.76194
## ca_before_simlesa 0.02667 0.20070 0.133 0.89429
## no_hh_members 0.10362 0.02447 4.235 2.28e-05 ***
## sex -0.61128 0.21718 -2.815 0.00488 **
## asset_ownership 0.21545 0.10561 2.040 0.04135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1020.37 on 738 degrees of freedom
## Residual deviance: 965.19 on 729 degrees of freedom
## AIC: 985.19
##
## Number of Fisher Scoring iterations: 4
#Now we can run the anova() function on the model to analyze the table of deviance
anova(n_logit, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: yld
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 738 1020.37
## cattle_ownership 1 0.0177 737 1020.36 0.894101
## education_level 1 0.1165 736 1020.24 0.732850
## evaluation 1 26.2200 735 994.02 3.047e-07 ***
## reason_for_selection 1 2.2983 734 991.72 0.129518
## buildings 1 0.0315 733 991.69 0.859117
## ca_before_simlesa 1 0.0807 732 991.61 0.776366
## no_hh_members 1 15.3550 731 976.25 8.908e-05 ***
## sex 1 6.8844 730 969.37 0.008695 **
## asset_ownership 1 4.1839 729 965.19 0.040810 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
f_logit<- glm(yld ~sex+marital_status+age+years_in_simlesa+ca_before_simlesa+othe_research_trails+other_livestock+reason_for_selection+soil_type+no_hh_members+hh_help+contribution_of_labor+education_level+hle+paid_work+own_work+pry_occupation+sec_occupation+asset_ownership+buildings+farm_tools+cattle_ownership+performance_simlesa+evaluation+access_to_information+access_to_credit+loan_purpose+living_standard,family=binomial(link='logit'),data=sbie_logit)
summary(f_logit)
##
## Call:
## glm(formula = yld ~ sex + marital_status + age + years_in_simlesa +
## ca_before_simlesa + othe_research_trails + other_livestock +
## reason_for_selection + soil_type + no_hh_members + hh_help +
## contribution_of_labor + education_level + hle + paid_work +
## own_work + pry_occupation + sec_occupation + asset_ownership +
## buildings + farm_tools + cattle_ownership + performance_simlesa +
## evaluation + access_to_information + access_to_credit + loan_purpose +
## living_standard, family = binomial(link = "logit"), data = sbie_logit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3996 -0.9233 0.3401 0.8920 2.0299
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -16.641854 9.714864 -1.713 0.086707 .
## sex 8.127608 5.455513 1.490 0.136278
## marital_status 0.074832 0.222940 0.336 0.737127
## age -0.073684 0.023032 -3.199 0.001378 **
## years_in_simlesa 0.336490 0.678915 0.496 0.620156
## ca_before_simlesa -8.895319 2.439632 -3.646 0.000266 ***
## othe_research_trails 3.925710 2.799568 1.402 0.160839
## other_livestock 0.253609 0.634035 0.400 0.689163
## reason_for_selection -4.299973 1.969799 -2.183 0.029039 *
## soil_type 1.106331 1.137001 0.973 0.330541
## no_hh_members 0.879580 0.384575 2.287 0.022187 *
## hh_help -0.689783 0.288896 -2.388 0.016956 *
## contribution_of_labor -1.566537 0.609590 -2.570 0.010175 *
## education_level -2.219180 1.978190 -1.122 0.261938
## hle -3.697038 2.306423 -1.603 0.108950
## paid_work -4.072843 2.045678 -1.991 0.046486 *
## own_work -0.461448 0.164020 -2.813 0.004903 **
## pry_occupation 5.535901 1.912107 2.895 0.003789 **
## sec_occupation 0.527845 0.209950 2.514 0.011932 *
## asset_ownership -3.122559 2.007690 -1.555 0.119875
## buildings -0.007627 0.335526 -0.023 0.981865
## farm_tools 0.630685 3.290980 0.192 0.848024
## cattle_ownership -0.941065 3.166804 -0.297 0.766340
## performance_simlesa 5.774518 1.419364 4.068 4.73e-05 ***
## evaluation -0.822330 0.527231 -1.560 0.118828
## access_to_information 1.849765 1.448968 1.277 0.201740
## access_to_credit 14.679650 8.391815 1.749 0.080242 .
## loan_purpose 1.402258 1.112229 1.261 0.207394
## living_standard 0.695517 0.711549 0.977 0.328337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1020.37 on 738 degrees of freedom
## Residual deviance: 808.72 on 710 degrees of freedom
## AIC: 866.72
##
## Number of Fisher Scoring iterations: 7
age+ ca_before_simlesa+ reason_for_selection + no_hh_members+ hh_help + contribution_of_labor+ paid_work+ own_work+ pry_occupation+ sec_occupation + performance_simlesa+ access_to_credit +