library(ggplot2)
library(msm)
library(sandwich)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(MASS)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(FactoMineR)
library(ggplot2)
library(nFactors)
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:msm':
## 
##     cav
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
## 
##     parallel
library(readr)
sbie <- read_csv("~/Dr Nyagumbo/Sbie/sbie.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   relative_yield = col_double(),
##   age = col_double(),
##   years_in_simlesa = col_double(),
##   no_hh_members = col_double(),
##   hh_help = col_double(),
##   contribution_of_labor = col_double()
## )
## See spec(...) for full column specifications.
View(sbie)
attach(sbie)
names(sbie)
##  [1] "site"                  "season"               
##  [3] "farmer_name"           "treatment"            
##  [5] "relative_yield"        "sex"                  
##  [7] "marital_status"        "age"                  
##  [9] "years_in_simlesa"      "ca_before_simlesa"    
## [11] "othe_research_trails"  "reason_for_selection" 
## [13] "soil_type"             "no_hh_members"        
## [15] "hh_help"               "contribution_of_labor"
## [17] "education_level"       "hle"                  
## [19] "paid_work"             "own_work"             
## [21] "pry_occupation"        "sec_occupation"       
## [23] "asset_ownership"       "buildings"            
## [25] "farm_tools"            "cattle_ownership"     
## [27] "other_livestock"       "performance_simlesa"  
## [29] "evaluation"            "access_to_information"
## [31] "access_to_credit"      "loan_purpose"         
## [33] "living_standard"
#names(sbie)
site=as.factor(site)
season=as.factor(season)
treatment=as.factor(treatment)
sex=as.factor(sex)
marital_status=as.factor(marital_status)
#age continues variable
#years in simlesa
ca_before_simlesa=as.factor(ca_before_simlesa)
othe_research_trails=as.factor(othe_research_trails)
reason_for_selection=as.factor(reason_for_selection)
soil_type=as.factor(soil_type)
#no_hh_members continues
#hh_help number of hse hold that help in the field 
#contribution_of_labour
education_level=as.factor(education_level)
hle=as.factor(hle)
paid_work=as.factor(paid_work)
own_work=as.factor(own_work)
pry_occupation=as.factor(pry_occupation)
sec_occupation=as.factor(sec_occupation)
asset_ownership=as.factor(asset_ownership)
buildings=as.factor(buildings)
farm_tools=as.factor(farm_tools)
cattle_ownership=as.factor(cattle_ownership)
other_livestock=as.factor(other_livestock)
performance_simlesa=as.factor(performance_simlesa)
evaluation=as.factor(evaluation)
access_to_information=as.factor(access_to_information)
access_to_credit=as.factor(access_to_credit)
loan_purpose=as.factor(loan_purpose)
living_standard=as.factor(living_standard)
#summary(sbie)
library(readr)
sbie_explanatory <- read_csv("~/Dr Nyagumbo/Sbie/sbie_explanatory.csv")
## Parsed with column specification:
## cols(
##   .default = col_double()
## )
## See spec(...) for full column specifications.
View(sbie_explanatory)
attach(sbie_explanatory)
## The following objects are masked _by_ .GlobalEnv:
## 
##     access_to_credit, access_to_information, asset_ownership,
##     buildings, ca_before_simlesa, cattle_ownership,
##     education_level, evaluation, farm_tools, hle, living_standard,
##     loan_purpose, marital_status, othe_research_trails,
##     other_livestock, own_work, paid_work, performance_simlesa,
##     pry_occupation, reason_for_selection, sec_occupation, sex,
##     soil_type
## The following objects are masked from sbie:
## 
##     access_to_credit, access_to_information, age, asset_ownership,
##     buildings, ca_before_simlesa, cattle_ownership,
##     contribution_of_labor, education_level, evaluation,
##     farm_tools, hh_help, hle, living_standard, loan_purpose,
##     marital_status, no_hh_members, othe_research_trails,
##     other_livestock, own_work, paid_work, performance_simlesa,
##     pry_occupation, reason_for_selection, relative_yield,
##     sec_occupation, sex, soil_type, years_in_simlesa
res.pca <- prcomp(sbie_explanatory, scale = TRUE)
fviz_eig(res.pca)

fviz_pca_var(res.pca,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
             )

fviz_pca_ind(res.pca,
             col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
             )

ev<-eigen(cor(sbie_explanatory))
ap<-parallel(subject=nrow(sbie_explanatory),var=ncol(sbie_explanatory),rep=100,cent=.05)
nS<-nScree(x=ev$values,parallel=ap$eigen$qevpea)
plotnScree(nS)

fac<-factanal(sbie_explanatory,5,rotation="varimax")
print(fac,digits = 2,sort=TRUE)
## 
## Call:
## factanal(x = sbie_explanatory, factors = 5, rotation = "varimax")
## 
## Uniquenesses:
##        relative_yield                   sex        marital_status 
##                  0.95                  0.74                  0.73 
##                   age      years_in_simlesa     ca_before_simlesa 
##                  0.78                  0.90                  0.43 
##  othe_research_trails  reason_for_selection             soil_type 
##                  0.59                  0.37                  0.64 
##         no_hh_members               hh_help contribution_of_labor 
##                  0.13                  0.02                  0.81 
##       education_level                   hle             paid_work 
##                  0.54                  0.63                  0.66 
##              own_work        pry_occupation        sec_occupation 
##                  0.54                  0.49                  0.00 
##       asset_ownership             buildings            farm_tools 
##                  0.73                  0.70                  0.57 
##      cattle_ownership       other_livestock   performance_simlesa 
##                  0.63                  0.45                  0.32 
##            evaluation access_to_information      access_to_credit 
##                  0.00                  0.53                  0.07 
##          loan_purpose       living_standard 
##                  0.02                  0.95 
## 
## Loadings:
##                       Factor1 Factor2 Factor3 Factor4 Factor5
## othe_research_trails  -0.55           -0.14           -0.27  
## reason_for_selection   0.58   -0.29    0.44    0.13          
## own_work               0.52           -0.18   -0.13    0.37  
## performance_simlesa    0.59           -0.13    0.36    0.42  
## access_to_information  0.63                    0.15   -0.21  
## access_to_credit       0.81    0.26    0.14   -0.42          
## loan_purpose          -0.76   -0.33   -0.12    0.53          
## ca_before_simlesa     -0.30   -0.52           -0.44    0.13  
## soil_type              0.28    0.51    0.12                  
## education_level        0.15    0.55            0.28    0.22  
## sec_occupation         0.37   -0.73           -0.12    0.56  
## cattle_ownership               0.58   -0.16                  
## no_hh_members         -0.45   -0.21    0.77    0.14          
## hh_help                        0.11    0.86    0.33   -0.34  
## paid_work                              0.53            0.21  
## hle                                    0.15    0.59          
## other_livestock                                0.70   -0.22  
## pry_occupation         0.14    0.15    0.42            0.55  
## evaluation             0.15    0.49    0.19    0.12    0.82  
## relative_yield                                        -0.20  
## sex                   -0.46    0.11    0.18                  
## marital_status        -0.15           -0.18   -0.46          
## age                           -0.21    0.12   -0.23    0.32  
## years_in_simlesa               0.26                    0.16  
## contribution_of_labor                 -0.23    0.33    0.13  
## asset_ownership       -0.13    0.50                          
## buildings              0.16   -0.35    0.38    0.11          
## farm_tools            -0.44   -0.35    0.30   -0.18          
## living_standard                0.19           -0.10          
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings       3.95    2.97    2.58    2.36    2.20
## Proportion Var    0.14    0.10    0.09    0.08    0.08
## Cumulative Var    0.14    0.24    0.33    0.41    0.49
## 
## Test of the hypothesis that 5 factors are sufficient.
## The chi square statistic is 16890.25 on 271 degrees of freedom.
## The p-value is 0

cutoff=.4,

eig.val <- get_eigenvalue(res.pca)
eig.val
##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  4.9575116015     1.709487e+01                    17.09487
## Dim.2  3.7528907914     1.294100e+01                    30.03587
## Dim.3  3.4234641955     1.180505e+01                    41.84092
## Dim.4  2.4369513464     8.403281e+00                    50.24420
## Dim.5  2.1281275681     7.338371e+00                    57.58257
## Dim.6  1.8677276394     6.440440e+00                    64.02301
## Dim.7  1.6586307011     5.719416e+00                    69.74243
## Dim.8  1.2856193211     4.433170e+00                    74.17560
## Dim.9  1.2188170247     4.202817e+00                    78.37841
## Dim.10 1.0516392521     3.626342e+00                    82.00476
## Dim.11 0.8502617737     2.931937e+00                    84.93669
## Dim.12 0.7700621125     2.655387e+00                    87.59208
## Dim.13 0.6831232880     2.355598e+00                    89.94768
## Dim.14 0.6616682150     2.281615e+00                    92.22929
## Dim.15 0.4114402672     1.418760e+00                    93.64805
## Dim.16 0.3991688407     1.376444e+00                    95.02450
## Dim.17 0.3569257140     1.230778e+00                    96.25527
## Dim.18 0.2724150828     9.393624e-01                    97.19464
## Dim.19 0.2203903566     7.599667e-01                    97.95460
## Dim.20 0.1677214419     5.783498e-01                    98.53295
## Dim.21 0.1392943769     4.803254e-01                    99.01328
## Dim.22 0.0821378871     2.832341e-01                    99.29651
## Dim.23 0.0752868576     2.596099e-01                    99.55612
## Dim.24 0.0503594344     1.736532e-01                    99.72978
## Dim.25 0.0438519707     1.512137e-01                    99.88099
## Dim.26 0.0197040930     6.794515e-02                    99.94894
## Dim.27 0.0104784228     3.613249e-02                    99.98507
## Dim.28 0.0041517546     1.431640e-02                    99.99938
## Dim.29 0.0001786691     6.161004e-04                   100.00000

Confirmatory factor analysis

library(sem)
library(lavaan)
## This is lavaan 0.6-3
## lavaan is BETA software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following objects are masked from 'package:sem':
## 
##     cfa, sem
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(semPlot)
library(foreign)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
HS.model <- 'relative_yield=~ sex+marital_status+age+years_in_simlesa+ca_before_simlesa+othe_research_trails+other_livestock+reason_for_selection+soil_type+no_hh_members+hh_help+contribution_of_labor+education_level+hle+paid_work+own_work+pry_occupation+sec_occupation+asset_ownership+buildings+farm_tools+cattle_ownership+performance_simlesa+evaluation+access_to_information+access_to_credit+loan_purpose+living_standard'
fit <- cfa(HS.model, data=sbie_explanatory)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
summary(fit, fit.measures=TRUE)
## lavaan 0.6-3 ended normally after 187 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         56
## 
##   Number of observations                           739
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                   24061.595
##   Degrees of freedom                               350
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic            27120.185
##   Degrees of freedom                               378
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.113
##   Tucker-Lewis Index (TLI)                       0.042
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -29512.035
##   Loglikelihood unrestricted model (H1)     -17481.238
## 
##   Number of free parameters                         56
##   Akaike (AIC)                               59136.071
##   Bayesian (BIC)                             59393.968
##   Sample-size adjusted Bayesian (BIC)        59216.148
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.303
##   90 Percent Confidence Interval          0.300  0.306
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.191
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   relative_yield =~                                    
##     sex                1.000                           
##     marital_status    -0.833    0.646   -1.289    0.197
##     age              -12.019    3.647   -3.295    0.001
##     years_in_simls    -0.197    0.338   -0.583    0.560
##     ca_before_smls     0.642    0.145    4.442    0.000
##     oth_rsrch_trls     1.537    0.194    7.915    0.000
##     other_livestck     1.601    0.229    6.983    0.000
##     reasn_fr_slctn    -2.447    0.336   -7.277    0.000
##     soil_type         -2.327    0.330   -7.059    0.000
##     no_hh_members     10.651    1.437    7.411    0.000
##     hh_help            0.728    0.920    0.791    0.429
##     contrbtn_f_lbr     3.157    0.622    5.077    0.000
##     education_levl    -1.108    0.206   -5.384    0.000
##     hle                0.683    0.123    5.539    0.000
##     paid_work         -0.239    0.120   -1.995    0.046
##     own_work         -14.285    1.771   -8.067    0.000
##     pry_occupation    -0.984    0.185   -5.318    0.000
##     sec_occupation    -3.893    0.922   -4.224    0.000
##     asset_ownershp    -0.493    0.263   -1.874    0.061
##     buildings         -0.073    0.224   -0.328    0.743
##     farm_tools         0.969    0.142    6.833    0.000
##     cattle_ownrshp    -0.485    0.149   -3.256    0.001
##     performnc_smls    -1.091    0.170   -6.424    0.000
##     evaluation        -3.860    0.629   -6.135    0.000
##     accss_t_nfrmtn    -2.760    0.348   -7.933    0.000
##     access_to_crdt    -4.050    0.417   -9.721    0.000
##     loan_purpose      30.558    3.147    9.710    0.000
##     living_standrd    -0.871    0.234   -3.718    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .sex               0.143    0.007   19.148    0.000
##    .marital_status    5.594    0.291   19.221    0.000
##    .age             160.615    8.360   19.213    0.000
##    .years_in_simls    1.553    0.081   19.222    0.000
##    .ca_before_smls    0.226    0.012   19.203    0.000
##    .oth_rsrch_trls    0.176    0.009   19.079    0.000
##    .other_livestck    0.350    0.018   19.144    0.000
##    .reasn_fr_slctn    0.685    0.036   19.129    0.000
##    .soil_type         0.707    0.037   19.141    0.000
##    .no_hh_members    11.941    0.625   19.121    0.000
##    .hh_help          11.468    0.597   19.222    0.000
##    .contrbtn_f_lbr    3.846    0.200   19.195    0.000
##    .education_levl    0.401    0.021   19.190    0.000
##    .hle               0.140    0.007   19.187    0.000
##    .paid_work         0.187    0.010   19.219    0.000
##    .own_work         13.571    0.712   19.061    0.000
##    .pry_occupation    0.328    0.017   19.191    0.000
##    .sec_occupation    9.412    0.490   19.205    0.000
##    .asset_ownershp    0.910    0.047   19.220    0.000
##    .buildings         0.681    0.035   19.222    0.000
##    .farm_tools        0.140    0.007   19.151    0.000
##    .cattle_ownrshp    0.269    0.014   19.213    0.000
##    .performnc_smls    0.223    0.012   19.165    0.000
##    .evaluation        3.267    0.170   19.174    0.000
##    .accss_t_nfrmtn    0.561    0.029   19.077    0.000
##    .access_to_crdt    0.016    0.003    5.424    0.000
##    .loan_purpose      1.392    0.178    7.825    0.000
##    .living_standrd    0.640    0.033   19.210    0.000
##     relative_yield    0.019    0.004    4.733    0.000
m1 <- lm(relative_yield ~farm_tools +othe_research_trails+soil_type+own_work+sec_occupation+access_to_credit+loan_purpose, data=sbie)
summary(m1)  # printing out the results of the model
## 
## Call:
## lm(formula = relative_yield ~ farm_tools + othe_research_trails + 
##     soil_type + own_work + sec_occupation + access_to_credit + 
##     loan_purpose, data = sbie)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -145.831  -31.412   -5.769   24.713  243.317 
## 
## Coefficients:
##                                        Estimate Std. Error t value
## (Intercept)                             93.1654    17.4716   5.332
## farm_toolsrich                         -12.9113     5.9418  -2.173
## othe_research_trailsyes                  6.0508     6.6881   0.905
## soil_typesandy                          -7.3260     6.9188  -1.059
## soil_typesandy clay                      0.8456     6.5935   0.128
## own_workyes                            -13.6880     5.8738  -2.330
## sec_occupationnot agriculture related   54.9209    13.5045   4.067
## sec_occupationoff-farm self-employment  46.9567    15.0267   3.125
## sec_occupationown farming               47.3177    14.2966   3.310
## access_to_creditno                     -31.4186    12.5281  -2.508
## access_to_credityes                    -20.3618    11.5335  -1.765
## loan_purposenon agricultural             2.9935     6.8093   0.440
##                                        Pr(>|t|)    
## (Intercept)                            1.38e-07 ***
## farm_toolsrich                          0.03017 *  
## othe_research_trailsyes                 0.36598    
## soil_typesandy                          0.29009    
## soil_typesandy clay                     0.89800    
## own_workyes                             0.02012 *  
## sec_occupationnot agriculture related  5.40e-05 ***
## sec_occupationoff-farm self-employment  0.00186 ** 
## sec_occupationown farming               0.00099 ***
## access_to_creditno                      0.01241 *  
## access_to_credityes                     0.07800 .  
## loan_purposenon agricultural            0.66037    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.39 on 599 degrees of freedom
##   (129 observations deleted due to missingness)
## Multiple R-squared:  0.09467,    Adjusted R-squared:  0.07805 
## F-statistic: 5.694 on 11 and 599 DF,  p-value: 9.201e-09
#anova(m1)

m1 <- lm(relative_yield ~farm_tools +othe_research_trails+soil_type+own_work+sec_occupation+access_to_credit+loan_purpose, family=“poisson”, data=sbie) #summary(m1) # printing out the results of the model anova(m1)

m2<-glm(relative_yield ~cattle_ownership+education_level+evaluation+reason_for_selection+buildings+ca_before_simlesa+no_hh_members+sex+asset_ownership, data=sbie)
anova(m2)
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: relative_yield
## 
## Terms added sequentially (first to last)
## 
## 
##                      Df Deviance Resid. Df Resid. Dev
## NULL                                   723    1773018
## cattle_ownership      2     6560       721    1766458
## education_level       2    25558       719    1740900
## evaluation            1    38068       718    1702832
## reason_for_selection  2    91968       716    1610864
## buildings             2    49567       714    1561297
## ca_before_simlesa     1     1837       713    1559459
## no_hh_members         1    29746       712    1529713
## sex                   1     2789       711    1526925
## asset_ownership       2    54708       709    1472217
summary(m2)
## 
## Call:
## glm(formula = relative_yield ~ cattle_ownership + education_level + 
##     evaluation + reason_for_selection + buildings + ca_before_simlesa + 
##     no_hh_members + sex + asset_ownership, data = sbie)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -122.909   -29.460    -2.867    23.935   229.798  
## 
## Coefficients:
##                                                                      Estimate
## (Intercept)                                                           40.7165
## cattle_ownershipbetween 1 and 9                                       16.0404
## cattle_ownershipzero                                                  32.4499
## education_levellevel 7 to 12                                          13.6543
## education_levelnever been to school can<U+0092>t read / write         16.1746
## evaluationvery good                                                   21.5578
## reason_for_selectionany other reason                                 -19.2196
## reason_for_selectiongood soils plusability to run trails/hardworking  23.6870
## buildingspoor                                                        -21.4485
## buildingsrich                                                         -2.3207
## ca_before_simlesayes                                                  -0.2845
## no_hh_members                                                          1.8202
## sexMale                                                               11.6707
## asset_ownershippoor                                                  -47.7258
## asset_ownershiprich                                                   14.7204
##                                                                      Std. Error
## (Intercept)                                                             22.3721
## cattle_ownershipbetween 1 and 9                                         17.7127
## cattle_ownershipzero                                                    17.9333
## education_levellevel 7 to 12                                             4.8224
## education_levelnever been to school can<U+0092>t read / write            8.8325
## evaluationvery good                                                      5.1618
## reason_for_selectionany other reason                                     7.3145
## reason_for_selectiongood soils plusability to run trails/hardworking     6.1897
## buildingspoor                                                            5.6881
## buildingsrich                                                            6.2718
## ca_before_simlesayes                                                     4.7570
## no_hh_members                                                            0.6043
## sexMale                                                                  6.8100
## asset_ownershippoor                                                     13.3463
## asset_ownershiprich                                                      5.3336
##                                                                      t value
## (Intercept)                                                            1.820
## cattle_ownershipbetween 1 and 9                                        0.906
## cattle_ownershipzero                                                   1.809
## education_levellevel 7 to 12                                           2.831
## education_levelnever been to school can<U+0092>t read / write          1.831
## evaluationvery good                                                    4.176
## reason_for_selectionany other reason                                  -2.628
## reason_for_selectiongood soils plusability to run trails/hardworking   3.827
## buildingspoor                                                         -3.771
## buildingsrich                                                         -0.370
## ca_before_simlesayes                                                  -0.060
## no_hh_members                                                          3.012
## sexMale                                                                1.714
## asset_ownershippoor                                                   -3.576
## asset_ownershiprich                                                    2.760
##                                                                      Pr(>|t|)
## (Intercept)                                                          0.069185
## cattle_ownershipbetween 1 and 9                                      0.365462
## cattle_ownershipzero                                                 0.070800
## education_levellevel 7 to 12                                         0.004766
## education_levelnever been to school can<U+0092>t read / write        0.067480
## evaluationvery good                                                  3.33e-05
## reason_for_selectionany other reason                                 0.008785
## reason_for_selectiongood soils plusability to run trails/hardworking 0.000141
## buildingspoor                                                        0.000176
## buildingsrich                                                        0.711473
## ca_before_simlesayes                                                 0.952334
## no_hh_members                                                        0.002685
## sexMale                                                              0.087012
## asset_ownershippoor                                                  0.000373
## asset_ownershiprich                                                  0.005930
##                                                                         
## (Intercept)                                                          .  
## cattle_ownershipbetween 1 and 9                                         
## cattle_ownershipzero                                                 .  
## education_levellevel 7 to 12                                         ** 
## education_levelnever been to school can<U+0092>t read / write        .  
## evaluationvery good                                                  ***
## reason_for_selectionany other reason                                 ** 
## reason_for_selectiongood soils plusability to run trails/hardworking ***
## buildingspoor                                                        ***
## buildingsrich                                                           
## ca_before_simlesayes                                                    
## no_hh_members                                                        ** 
## sexMale                                                              .  
## asset_ownershippoor                                                  ***
## asset_ownershiprich                                                  ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 2076.47)
## 
##     Null deviance: 1773018  on 723  degrees of freedom
## Residual deviance: 1472217  on 709  degrees of freedom
##   (16 observations deleted due to missingness)
## AIC: 7601.7
## 
## Number of Fisher Scoring iterations: 2

Graphical presentation

stat_box_data <- function(relative_yield, upper_limit = max(relative_yield)*1.5) {
  return( 
    data.frame(
      y = 400,
      label = paste('N =', length(relative_yield), '\n',
                    'Mean =', round(mean(relative_yield), 0), '\n')
    )
  )
}

#####################


m<-ggplot(sbie, aes(x = sex, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width = 0.2,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

## Warning: Removed 16 rows containing non-finite values (stat_summary).

######################################################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+facet_wrap(.~education_level)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

#################################################
m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+facet_wrap(.~evaluation)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

###################################################################

m<-ggplot(sbie, aes(x = sex, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Gender")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+facet_wrap(.~living_standard)
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

############################################

m<-ggplot(sbie, aes(x =education_level, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Educational Level")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

## Warning: Removed 16 rows containing non-finite values (stat_summary).

##############################################################


m<-ggplot(sbie, aes(x =loan_purpose, y = relative_yield))+ geom_boxplot(size=0.6,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Loan Purpose")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

## Warning: Removed 16 rows containing non-finite values (stat_summary).

###############################################################
m<-ggplot(sbie, aes(x =soil_type, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width = 0.3,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Soil Type")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

## Warning: Removed 16 rows containing non-finite values (stat_summary).

#####################################################

m<-ggplot(sbie, aes(x =living_standard, y = relative_yield))+geom_violin()+ geom_boxplot(size=0.6,width =0.3,outlier.colour = "red",outlier.shape = 1, shape=6,notch = TRUE) + geom_smooth(method=lm)+ ylab("Relative CA Maize Grain Yield [kg/ha]")+ xlab("Standard of Living")+theme_classic()
m+
  stat_summary(
    fun.data = stat_box_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.2
  )+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")
## Warning: Removed 16 rows containing non-finite values (stat_ydensity).
## Warning: Removed 16 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_summary).

## Warning: Removed 16 rows containing non-finite values (stat_summary).

Uing the models which accommodate binary variables

library(readr)
sbie_logit <- read_csv("~/Dr Nyagumbo/Sbie/sbie_logit.csv")
## Parsed with column specification:
## cols(
##   .default = col_double()
## )
## See spec(...) for full column specifications.
View(sbie_logit)
attach(sbie_logit)
## The following objects are masked _by_ .GlobalEnv:
## 
##     access_to_credit, access_to_information, asset_ownership,
##     buildings, ca_before_simlesa, cattle_ownership,
##     education_level, evaluation, farm_tools, hle, living_standard,
##     loan_purpose, marital_status, othe_research_trails,
##     other_livestock, own_work, paid_work, performance_simlesa,
##     pry_occupation, reason_for_selection, sec_occupation, sex,
##     soil_type
## The following objects are masked from sbie_explanatory:
## 
##     access_to_credit, access_to_information, age, asset_ownership,
##     buildings, ca_before_simlesa, cattle_ownership,
##     contribution_of_labor, education_level, evaluation,
##     farm_tools, hh_help, hle, living_standard, loan_purpose,
##     marital_status, no_hh_members, othe_research_trails,
##     other_livestock, own_work, paid_work, performance_simlesa,
##     pry_occupation, reason_for_selection, relative_yield,
##     sec_occupation, sex, soil_type, years_in_simlesa
## The following objects are masked from sbie:
## 
##     access_to_credit, access_to_information, age, asset_ownership,
##     buildings, ca_before_simlesa, cattle_ownership,
##     contribution_of_labor, education_level, evaluation,
##     farm_tools, hh_help, hle, living_standard, loan_purpose,
##     marital_status, no_hh_members, othe_research_trails,
##     other_livestock, own_work, paid_work, performance_simlesa,
##     pry_occupation, reason_for_selection, relative_yield,
##     sec_occupation, sex, soil_type, years_in_simlesa
season=as.factor(season)
treatment=as.factor(treatment)
sex=as.factor(sex)
marital_status=as.factor(marital_status)
#age continues variable
#years in simlesa
ca_before_simlesa=as.factor(ca_before_simlesa)
othe_research_trails=as.factor(othe_research_trails)
reason_for_selection=as.factor(reason_for_selection)
soil_type=as.factor(soil_type)
#no_hh_members continues
#hh_help number of hse hold that help in the field 
#contribution_of_labour
education_level=as.factor(education_level)
hle=as.factor(hle)
paid_work=as.factor(paid_work)
own_work=as.factor(own_work)
pry_occupation=as.factor(pry_occupation)
sec_occupation=as.factor(sec_occupation)
asset_ownership=as.factor(asset_ownership)
buildings=as.factor(buildings)
farm_tools=as.factor(farm_tools)
cattle_ownership=as.factor(cattle_ownership)
other_livestock=as.factor(other_livestock)
performance_simlesa=as.factor(performance_simlesa)
evaluation=as.factor(evaluation)
access_to_information=as.factor(access_to_information)
access_to_credit=as.factor(access_to_credit)
loan_purpose=as.factor(loan_purpose)
living_standard=as.factor(living_standard)
m_logit<- glm(yld ~farm_tools +othe_research_trails+own_work+sec_occupation+access_to_credit+loan_purpose,family=binomial(link='logit'),data=sbie_logit)
summary(m_logit)
## 
## Call:
## glm(formula = yld ~ farm_tools + othe_research_trails + own_work + 
##     sec_occupation + access_to_credit + loan_purpose, family = binomial(link = "logit"), 
##     data = sbie_logit)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1600  -1.1865   0.4518   1.0816   1.5342  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -3.11024    0.74637  -4.167 3.08e-05 ***
## farm_tools           -0.80745    0.23349  -3.458 0.000544 ***
## othe_research_trails  0.66880    0.20104   3.327 0.000879 ***
## own_work             -0.11904    0.02608  -4.564 5.02e-06 ***
## sec_occupation        0.07288    0.03136   2.324 0.020130 *  
## access_to_credit      2.87817    0.63569   4.528 5.97e-06 ***
## loan_purpose          0.30191    0.07687   3.927 8.59e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1020.37  on 738  degrees of freedom
## Residual deviance:  956.27  on 732  degrees of freedom
## AIC: 970.27
## 
## Number of Fisher Scoring iterations: 4
##################################

#Now we can run the anova() function on the model to analyze the table of deviance

anova(m_logit, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: yld
## 
## Terms added sequentially (first to last)
## 
## 
##                      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                   738    1020.37              
## farm_tools            1   0.8256       737    1019.55 0.3635435    
## othe_research_trails  1   6.5490       736    1013.00 0.0104944 *  
## own_work              1  13.3401       735     999.66 0.0002598 ***
## sec_occupation        1   5.5292       734     994.13 0.0187013 *  
## access_to_credit      1  15.2786       733     978.85 9.276e-05 ***
## loan_purpose          1  22.5784       732     956.27 2.017e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###
##While no exact equivalent to the R2 of linear regression exists, the McFadden R2 index can be used to assess the model fit.

library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(m_logit)
##           llh       llhNull            G2      McFadden          r2ML 
## -478.13669618 -510.18718807   64.10098379    0.06282104    0.08308469 
##          r2CU 
##    0.11098527
##################### odds ratio

library(aod)

wald.test(b = coef(m_logit), Sigma = vcov(m_logit), Terms = 4:6)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 44.1, df = 3, P(> X2) = 1.4e-09
exp(cbind(OR = coef(m_logit), confint(m_logit)))
## Waiting for profiling to be done...
##                               OR       2.5 %     97.5 %
## (Intercept)           0.04459014 0.008684922  0.1727574
## farm_tools            0.44599283 0.280849791  0.7024191
## othe_research_trails  1.95189518 1.318189746  2.9010758
## own_work              0.88777308 0.843033360  0.9339093
## sec_occupation        1.07560326 1.011877710  1.1444455
## access_to_credit     17.78164351 5.863929626 75.9910882
## loan_purpose          1.35244419 1.179854383  1.6072672
n_logit<- glm(yld ~cattle_ownership+education_level+evaluation+reason_for_selection+buildings+ca_before_simlesa+no_hh_members+sex+asset_ownership,family=binomial(link='logit'),data=sbie_logit)
summary(n_logit)
## 
## Call:
## glm(formula = yld ~ cattle_ownership + education_level + evaluation + 
##     reason_for_selection + buildings + ca_before_simlesa + no_hh_members + 
##     sex + asset_ownership, family = binomial(link = "logit"), 
##     data = sbie_logit)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7857  -1.1307   0.6861   1.0533   1.7469  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.80608    0.53553  -1.505  0.13227    
## cattle_ownership      0.27347    0.20209   1.353  0.17600    
## education_level       0.45975    0.15160   3.033  0.00242 ** 
## evaluation           -0.29744    0.05678  -5.238 1.62e-07 ***
## reason_for_selection -0.07908    0.11444  -0.691  0.48955    
## buildings            -0.04081    0.13472  -0.303  0.76194    
## ca_before_simlesa     0.02667    0.20070   0.133  0.89429    
## no_hh_members         0.10362    0.02447   4.235 2.28e-05 ***
## sex                  -0.61128    0.21718  -2.815  0.00488 ** 
## asset_ownership       0.21545    0.10561   2.040  0.04135 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1020.37  on 738  degrees of freedom
## Residual deviance:  965.19  on 729  degrees of freedom
## AIC: 985.19
## 
## Number of Fisher Scoring iterations: 4
#Now we can run the anova() function on the model to analyze the table of deviance

anova(n_logit, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: yld
## 
## Terms added sequentially (first to last)
## 
## 
##                      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                   738    1020.37              
## cattle_ownership      1   0.0177       737    1020.36  0.894101    
## education_level       1   0.1165       736    1020.24  0.732850    
## evaluation            1  26.2200       735     994.02 3.047e-07 ***
## reason_for_selection  1   2.2983       734     991.72  0.129518    
## buildings             1   0.0315       733     991.69  0.859117    
## ca_before_simlesa     1   0.0807       732     991.61  0.776366    
## no_hh_members         1  15.3550       731     976.25 8.908e-05 ***
## sex                   1   6.8844       730     969.37  0.008695 ** 
## asset_ownership       1   4.1839       729     965.19  0.040810 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
f_logit<- glm(yld ~sex+marital_status+age+years_in_simlesa+ca_before_simlesa+othe_research_trails+other_livestock+reason_for_selection+soil_type+no_hh_members+hh_help+contribution_of_labor+education_level+hle+paid_work+own_work+pry_occupation+sec_occupation+asset_ownership+buildings+farm_tools+cattle_ownership+performance_simlesa+evaluation+access_to_information+access_to_credit+loan_purpose+living_standard,family=binomial(link='logit'),data=sbie_logit)

summary(f_logit)
## 
## Call:
## glm(formula = yld ~ sex + marital_status + age + years_in_simlesa + 
##     ca_before_simlesa + othe_research_trails + other_livestock + 
##     reason_for_selection + soil_type + no_hh_members + hh_help + 
##     contribution_of_labor + education_level + hle + paid_work + 
##     own_work + pry_occupation + sec_occupation + asset_ownership + 
##     buildings + farm_tools + cattle_ownership + performance_simlesa + 
##     evaluation + access_to_information + access_to_credit + loan_purpose + 
##     living_standard, family = binomial(link = "logit"), data = sbie_logit)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3996  -0.9233   0.3401   0.8920   2.0299  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -16.641854   9.714864  -1.713 0.086707 .  
## sex                     8.127608   5.455513   1.490 0.136278    
## marital_status          0.074832   0.222940   0.336 0.737127    
## age                    -0.073684   0.023032  -3.199 0.001378 ** 
## years_in_simlesa        0.336490   0.678915   0.496 0.620156    
## ca_before_simlesa      -8.895319   2.439632  -3.646 0.000266 ***
## othe_research_trails    3.925710   2.799568   1.402 0.160839    
## other_livestock         0.253609   0.634035   0.400 0.689163    
## reason_for_selection   -4.299973   1.969799  -2.183 0.029039 *  
## soil_type               1.106331   1.137001   0.973 0.330541    
## no_hh_members           0.879580   0.384575   2.287 0.022187 *  
## hh_help                -0.689783   0.288896  -2.388 0.016956 *  
## contribution_of_labor  -1.566537   0.609590  -2.570 0.010175 *  
## education_level        -2.219180   1.978190  -1.122 0.261938    
## hle                    -3.697038   2.306423  -1.603 0.108950    
## paid_work              -4.072843   2.045678  -1.991 0.046486 *  
## own_work               -0.461448   0.164020  -2.813 0.004903 ** 
## pry_occupation          5.535901   1.912107   2.895 0.003789 ** 
## sec_occupation          0.527845   0.209950   2.514 0.011932 *  
## asset_ownership        -3.122559   2.007690  -1.555 0.119875    
## buildings              -0.007627   0.335526  -0.023 0.981865    
## farm_tools              0.630685   3.290980   0.192 0.848024    
## cattle_ownership       -0.941065   3.166804  -0.297 0.766340    
## performance_simlesa     5.774518   1.419364   4.068 4.73e-05 ***
## evaluation             -0.822330   0.527231  -1.560 0.118828    
## access_to_information   1.849765   1.448968   1.277 0.201740    
## access_to_credit       14.679650   8.391815   1.749 0.080242 .  
## loan_purpose            1.402258   1.112229   1.261 0.207394    
## living_standard         0.695517   0.711549   0.977 0.328337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1020.37  on 738  degrees of freedom
## Residual deviance:  808.72  on 710  degrees of freedom
## AIC: 866.72
## 
## Number of Fisher Scoring iterations: 7

age+ ca_before_simlesa+ reason_for_selection + no_hh_members+ hh_help + contribution_of_labor+ paid_work+ own_work+ pry_occupation+ sec_occupation + performance_simlesa+ access_to_credit +