library(readr)
kappa <- read_csv("~/Proposed papers/kappa.csv")
## Parsed with column specification:
## cols(
## seed = col_double(),
## algorithm = col_character(),
## statistic = col_character(),
## value = col_double()
## )
View(kappa)
attach(kappa)
library(ggplot2)
library(extrafontdb)
library(MASS)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(gridExtra)
library(repr) ### adjusting the length and width of your plot
library(beanplot)
library("devtools")
## Warning: package 'devtools' was built under R version 3.6.1
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 3.6.1
library("yarrr")
## Loading required package: jpeg
## Loading required package: BayesFactor
## Loading required package: coda
## Warning: package 'coda' was built under R version 3.6.1
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## ========================================
## circlize version 0.4.6
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
##
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
##
## diamonds
library(agricolae)
library(easynls)
library(MVN)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## sROC 0.1-2 loaded
library(lme4)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(ggsignif)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.1
## Loading required package: magrittr
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(multcompView)
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 3.6.1
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
##
## Attaching package: 'PerformanceAnalytics'
## The following objects are masked from 'package:agricolae':
##
## kurtosis, skewness
## The following object is masked from 'package:graphics':
##
## legend
algorithm=as.factor(algorithm)
statistic=as.factor(statistic)
md<-aov(value~algorithm+statistic,data=kappa)
summary(md)
## Df Sum Sq Mean Sq F value Pr(>F)
## algorithm 5 0.132 0.026 34.43 <2e-16 ***
## statistic 1 12.205 12.205 15950.67 <2e-16 ***
## Residuals 197 0.151 0.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###################################################
x<- LSD.test(md,"algorithm",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Accuracy",ylab="Accuracy]")
### HSD grouping of means
h<-HSD.test(md,"algorithm",group =TRUE)
h
## $statistics
## MSerror Df Mean CV MSD
## 0.0007651991 197 0.3119951 8.866239 0.01930859
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey algorithm 6 4.070079 0.05
##
## $means
## value std r Min Max Q25 Q50 Q75
## CART 0.3058529 0.2435119 34 0.044 0.596 0.0660 0.3000 0.53950
## KNN 0.3255588 0.2553190 34 0.040 0.604 0.0730 0.3220 0.57725
## LDA 0.3275294 0.2634035 34 0.038 0.611 0.0660 0.3380 0.58325
## LR 0.3266176 0.2594426 34 0.035 0.598 0.0675 0.3320 0.58200
## NB 0.3285000 0.2663417 34 0.046 0.613 0.0630 0.3355 0.58825
## SVM 0.2579118 0.2057043 34 0.034 0.511 0.0560 0.2500 0.46000
##
## $comparison
## NULL
##
## $groups
## value groups
## NB 0.3285000 a
## LDA 0.3275294 a
## LR 0.3266176 a
## KNN 0.3255588 a
## CART 0.3058529 b
## SVM 0.2579118 c
##
## attr(,"class")
## [1] "group"
acc<-subset(kappa,statistic=='accuracy')
md<-aov(value~algorithm,data=acc)
summary(md)
## Df Sum Sq Mean Sq F value Pr(>F)
## algorithm 5 0.21396 0.04279 115.4 <2e-16 ***
## Residuals 96 0.03559 0.00037
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###################################################
x<- LSD.test(md,"algorithm",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Accuracy",ylab="Accuracy]")
### HSD grouping of means
h<-HSD.test(md,"algorithm",group =TRUE)
h
## $statistics
## MSerror Df Mean CV MSD
## 0.0003707243 96 0.556598 3.459265 0.01920542
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey algorithm 6 4.112659 0.05
##
## $means
## value std r Min Max Q25 Q50 Q75
## CART 0.5450588 0.02268664 17 0.504 0.596 0.533 0.540 0.557
## KNN 0.5762941 0.02302923 17 0.538 0.604 0.562 0.579 0.596
## LDA 0.5864706 0.01398713 17 0.567 0.611 0.574 0.584 0.599
## LR 0.5817647 0.01001580 17 0.555 0.598 0.579 0.582 0.586
## NB 0.5903529 0.01625554 17 0.560 0.613 0.579 0.589 0.606
## SVM 0.4596471 0.02488207 17 0.418 0.511 0.448 0.460 0.477
##
## $comparison
## NULL
##
## $groups
## value groups
## NB 0.5903529 a
## LDA 0.5864706 a
## LR 0.5817647 a
## KNN 0.5762941 a
## CART 0.5450588 b
## SVM 0.4596471 c
##
## attr(,"class")
## [1] "group"
####
stat_box_data <- function(value, upper_limit = max(value)*1.5) {
return(
data.frame(
y =0.65,
label = paste('N =', length(value), '\n',
'Mean =', round(mean(value), 3), '\n')
)
)
}
theme_set(theme_gray(base_size = 10))
ggplot(acc, aes(x =algorithm, y = value))+ geom_boxplot(size=0.7,varwidth = FALSE,outlier.colour = "red",outlier.shape = 2, shape=2,fill=c("grey","grey","grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Accuracy") + xlab("Algorithm")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
fun.data = stat_box_data,
geom = "text",
hjust = 0.5,
vjust = 0.9,
size=2,
color="blue"
)+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
ggplot(acc, aes(x =seed , y = value,fill=algorithm)) +
geom_point(size=3,aes(shape=algorithm)) +
geom_smooth(method=lm, position = "jitter", level = 0.95,aes(color=algorithm), formula = y ~ x + I(x^2)+I(x^3)+I(x^4))+
ylim(0.4,0.65) + xlim(0,20)+ylab("Accuracy") +
xlab("seed")+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
## Warning: Removed 5 rows containing missing values (geom_smooth).
p <- ggplot(data = kappa,
aes(x=seed, y=value, color=algorithm,shape=statistic))+
geom_point(size=3)+ylab("Accuracy") + geom_line(shape=algorithm)+
xlab("Pseudorandom number (seed)")+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
## Warning: Ignoring unknown parameters: shape
p
qplot(data = kappa, x =seed, y = value, colour = statistic) +
stat_ellipse(geom = "polygon", alpha = 1/2, aes(fill = algorithm))
ggplot(kappa, aes(seed, value, color = statistic > 3)) +
geom_point() +
stat_ellipse(type = "norm", linetype = 2) +
stat_ellipse(type = "t")
ggplot(kappa, aes(seed, value, color = algorithm )) +
geom_point(size=3,shape=algorithm) +
stat_ellipse(type = "norm", linetype = 2) +
stat_ellipse(type = "t")+
xlab("Pseudorandom number (seed)")+ylab("Accuracy")+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
evaluation based on cross validation
library(readr)
vali <- read_csv("~/Proposed papers/vali.csv")
## Parsed with column specification:
## cols(
## validation = col_double(),
## algorithm = col_character(),
## accuracy = col_double(),
## kappa = col_double()
## )
View(vali)
attach(vali)
## The following objects are masked _by_ .GlobalEnv:
##
## algorithm, kappa
## The following object is masked from kappa:
##
## algorithm
algorithm=as.factor(algorithm)
p <- ggplot(data = vali,
aes(x=validation, y=accuracy, color=algorithm,shape=algorithm))+
geom_point(size=3)+ylab("Accuracy") + geom_line(shape=algorithm)+
xlab("Cross Validation")+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
## Warning: Ignoring unknown parameters: shape
p
################################ kappa
p <- ggplot(data = vali,
aes(x=validation, y=kappa, color=algorithm,shape=algorithm))+
geom_point(size=3)+ylab("Kappa") + geom_line(shape=algorithm)+
xlab("Cross Validation")+ theme_classic(base_size = 14)+ theme(axis.text.x = element_text(face="bold", color="black",
size=10),
axis.text.y = element_text(face="bold", color="black",
size=10))
## Warning: Ignoring unknown parameters: shape
p
library(readr)
machine_py <- read_csv("~/Proposed papers/machine_py.csv")
## Parsed with column specification:
## cols(
## Conv = col_double(),
## CA_rotation = col_double(),
## CA_sole = col_double(),
## CA_intercrop = col_double(),
## agroecoregion = col_character()
## )
#View(machine_py)
attach(machine_py)
agroecoregion=as.factor(agroecoregion)
co<-data.frame(Conv,CA_rotation,CA_sole,CA_intercrop)
chart.Correlation(co)
describeBy(co,agroecoregion)
##
## Descriptive statistics by group
## group: Highlands
## vars n mean sd median trimmed mad min
## Conv 1 257 2508.78 1819.49 2220.00 2332.06 1749.47 106.67
## CA_rotation 2 257 3545.08 1975.66 3315.00 3452.58 2289.91 199.00
## CA_sole 3 257 3654.53 1847.33 3603.00 3568.23 2077.12 148.00
## CA_intercrop 4 257 2754.88 2091.53 2133.32 2484.24 1976.79 110.00
## max range skew kurtosis se
## Conv 9000.00 8893.33 0.90 0.67 113.50
## CA_rotation 8773.59 8574.59 0.38 -0.61 123.24
## CA_sole 8931.00 8783.00 0.41 -0.26 115.23
## CA_intercrop 8900.00 8790.00 1.04 0.48 130.47
## --------------------------------------------------------
## group: Lowlands
## vars n mean sd median trimmed mad min
## Conv 1 257 2421.01 1649.87 2166.92 2270.86 1730.07 110.00
## CA_rotation 2 257 3209.30 1826.53 2895.76 3065.44 1790.78 280.00
## CA_sole 3 257 3072.35 1694.56 2735.80 2917.76 1484.11 231.82
## CA_intercrop 4 257 3061.76 1668.72 2866.99 2943.00 1728.50 120.00
## max range skew kurtosis se
## Conv 8814.60 8704.60 0.88 0.70 102.92
## CA_rotation 8585.56 8305.56 0.67 0.04 113.94
## CA_sole 8819.20 8587.38 0.88 0.45 105.70
## CA_intercrop 7910.00 7790.00 0.62 -0.13 104.09
t.test(Conv~agroecoregion,data = machine_py)
##
## Welch Two Sample t-test
##
## data: Conv by agroecoregion
## t = 0.57285, df = 507.17, p-value = 0.567
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -213.2375 388.7699
## sample estimates:
## mean in group Highlands mean in group Lowlands
## 2508.776 2421.010
t.test(CA_rotation~agroecoregion,data = machine_py)
##
## Welch Two Sample t-test
##
## data: CA_rotation by agroecoregion
## t = 2.0006, df = 508.88, p-value = 0.04596
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.040142 665.514744
## sample estimates:
## mean in group Highlands mean in group Lowlands
## 3545.080 3209.302
t.test(CA_sole~agroecoregion,data = machine_py)
##
## Welch Two Sample t-test
##
## data: CA_sole by agroecoregion
## t = 3.723, df = 508.23, p-value = 0.0002189
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 274.9612 889.3895
## sample estimates:
## mean in group Highlands mean in group Lowlands
## 3654.530 3072.355
t.test(CA_intercrop~agroecoregion,data = machine_py)
##
## Welch Two Sample t-test
##
## data: CA_intercrop by agroecoregion
## t = -1.8386, df = 487.94, p-value = 0.06658
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -634.81104 21.06287
## sample estimates:
## mean in group Highlands mean in group Lowlands
## 2754.882 3061.756
library(rpart)
library(party)
## Warning: package 'party' was built under R version 3.6.1
## Loading required package: grid
## Loading required package: modeltools
## Loading required package: stats4
##
## Attaching package: 'modeltools'
## The following object is masked from 'package:lme4':
##
## refit
## The following object is masked from 'package:BayesFactor':
##
## posterior
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.6.1
## Loading required package: sandwich
library(tree)
## Warning: package 'tree' was built under R version 3.6.1
## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
library(partykit)
## Warning: package 'partykit' was built under R version 3.6.1
## Loading required package: libcoin
## Warning: package 'libcoin' was built under R version 3.6.1
##
## Attaching package: 'partykit'
## The following objects are masked from 'package:party':
##
## cforest, ctree, ctree_control, edge_simple, mob, mob_control,
## node_barplot, node_bivplot, node_boxplot, node_inner,
## node_surv, node_terminal, varimp
### grow a tree
fit <- rpart(agroecoregion~Conv+CA_rotation+CA_sole+CA_intercrop,
method="class", data=machine_py)
printcp(fit)
##
## Classification tree:
## rpart(formula = agroecoregion ~ Conv + CA_rotation + CA_sole +
## CA_intercrop, data = machine_py, method = "class")
##
## Variables actually used in tree construction:
## [1] CA_intercrop CA_rotation CA_sole Conv
##
## Root node error: 257/514 = 0.5
##
## n= 514
##
## CP nsplit rel error xerror xstd
## 1 0.182879 0 1.00000 1.12451 0.043765
## 2 0.101167 1 0.81712 0.85992 0.043673
## 3 0.027237 2 0.71595 0.74708 0.042674
## 4 0.013619 5 0.63424 0.72763 0.042440
## 5 0.011673 7 0.60700 0.76265 0.042848
## 6 0.010000 8 0.59533 0.73541 0.042536
plotcp(fit)
# visualize cross-validation results
summary(fit) # detailed summary of splits
## Call:
## rpart(formula = agroecoregion ~ Conv + CA_rotation + CA_sole +
## CA_intercrop, data = machine_py, method = "class")
## n= 514
##
## CP nsplit rel error xerror xstd
## 1 0.18287938 0 1.0000000 1.1245136 0.04376486
## 2 0.10116732 1 0.8171206 0.8599222 0.04367323
## 3 0.02723735 2 0.7159533 0.7470817 0.04267405
## 4 0.01361868 5 0.6342412 0.7276265 0.04244045
## 5 0.01167315 7 0.6070039 0.7626459 0.04284764
## 6 0.01000000 8 0.5953307 0.7354086 0.04253612
##
## Variable importance
## CA_intercrop CA_sole Conv CA_rotation
## 45 29 14 12
##
## Node number 1: 514 observations, complexity param=0.1828794
## predicted class=Highlands expected loss=0.5 P(node) =1
## class counts: 257 257
## probabilities: 0.500 0.500
## left son=2 (117 obs) right son=3 (397 obs)
## Primary splits:
## CA_intercrop < 1261.024 to the left, improve=12.222290, (0 missing)
## CA_sole < 3498.759 to the right, improve=11.933030, (0 missing)
## CA_rotation < 5022.483 to the right, improve= 4.425696, (0 missing)
## Conv < 309.1429 to the left, improve= 3.470042, (0 missing)
## Surrogate splits:
## CA_rotation < 276.5 to the left, agree=0.776, adj=0.017, (0 split)
## CA_sole < 8872.1 to the right, agree=0.776, adj=0.017, (0 split)
##
## Node number 2: 117 observations
## predicted class=Highlands expected loss=0.2991453 P(node) =0.2276265
## class counts: 82 35
## probabilities: 0.701 0.299
##
## Node number 3: 397 observations, complexity param=0.1011673
## predicted class=Lowlands expected loss=0.440806 P(node) =0.7723735
## class counts: 175 222
## probabilities: 0.441 0.559
## left son=6 (162 obs) right son=7 (235 obs)
## Primary splits:
## CA_sole < 3498.759 to the right, improve=10.642600, (0 missing)
## CA_intercrop < 7955 to the right, improve= 5.759121, (0 missing)
## CA_rotation < 4912.688 to the right, improve= 3.957297, (0 missing)
## Conv < 4410.106 to the right, improve= 3.625561, (0 missing)
## Surrogate splits:
## CA_rotation < 4912.688 to the right, agree=0.650, adj=0.142, (0 split)
## Conv < 2770.3 to the right, agree=0.610, adj=0.043, (0 split)
## CA_intercrop < 6317.155 to the right, agree=0.602, adj=0.025, (0 split)
##
## Node number 6: 162 observations, complexity param=0.02723735
## predicted class=Highlands expected loss=0.4197531 P(node) =0.3151751
## class counts: 94 68
## probabilities: 0.580 0.420
## left son=12 (62 obs) right son=13 (100 obs)
## Primary splits:
## CA_intercrop < 3751.362 to the right, improve=2.578742, (0 missing)
## CA_rotation < 797.0455 to the right, improve=1.674107, (0 missing)
## CA_sole < 4553.112 to the left, improve=1.629697, (0 missing)
## Conv < 4384.598 to the right, improve=1.623725, (0 missing)
## Surrogate splits:
## Conv < 4531.065 to the right, agree=0.667, adj=0.129, (0 split)
## CA_sole < 3545.556 to the left, agree=0.630, adj=0.032, (0 split)
## CA_rotation < 6675.363 to the right, agree=0.623, adj=0.016, (0 split)
##
## Node number 7: 235 observations, complexity param=0.02723735
## predicted class=Lowlands expected loss=0.3446809 P(node) =0.4571984
## class counts: 81 154
## probabilities: 0.345 0.655
## left son=14 (7 obs) right son=15 (228 obs)
## Primary splits:
## CA_intercrop < 7260.057 to the right, improve=6.196790, (0 missing)
## CA_rotation < 4526.093 to the right, improve=2.910365, (0 missing)
## CA_sole < 3331.514 to the left, improve=2.742059, (0 missing)
## Conv < 4269.529 to the right, improve=2.503498, (0 missing)
## Surrogate splits:
## Conv < 8130.172 to the right, agree=0.983, adj=0.429, (0 split)
##
## Node number 12: 62 observations
## predicted class=Highlands expected loss=0.3064516 P(node) =0.1206226
## class counts: 43 19
## probabilities: 0.694 0.306
##
## Node number 13: 100 observations, complexity param=0.02723735
## predicted class=Highlands expected loss=0.49 P(node) =0.1945525
## class counts: 51 49
## probabilities: 0.510 0.490
## left son=26 (44 obs) right son=27 (56 obs)
## Primary splits:
## CA_sole < 4532.199 to the left, improve=4.639091, (0 missing)
## CA_intercrop < 1940 to the left, improve=3.245873, (0 missing)
## Conv < 804.4232 to the left, improve=2.475017, (0 missing)
## CA_rotation < 6347.129 to the left, improve=1.638120, (0 missing)
## Surrogate splits:
## CA_intercrop < 1989.548 to the left, agree=0.64, adj=0.182, (0 split)
## Conv < 1026.515 to the left, agree=0.63, adj=0.159, (0 split)
## CA_rotation < 2099.317 to the left, agree=0.59, adj=0.068, (0 split)
##
## Node number 14: 7 observations
## predicted class=Highlands expected loss=0 P(node) =0.01361868
## class counts: 7 0
## probabilities: 1.000 0.000
##
## Node number 15: 228 observations, complexity param=0.01361868
## predicted class=Lowlands expected loss=0.3245614 P(node) =0.4435798
## class counts: 74 154
## probabilities: 0.325 0.675
## left son=30 (47 obs) right son=31 (181 obs)
## Primary splits:
## CA_rotation < 4526.093 to the right, improve=3.215882, (0 missing)
## CA_sole < 3331.514 to the left, improve=2.434958, (0 missing)
## CA_intercrop < 4016.424 to the left, improve=2.247850, (0 missing)
## Conv < 4058.735 to the right, improve=1.375439, (0 missing)
## Surrogate splits:
## CA_sole < 3489.13 to the right, agree=0.803, adj=0.043, (0 split)
## CA_intercrop < 6116.648 to the right, agree=0.798, adj=0.021, (0 split)
##
## Node number 26: 44 observations
## predicted class=Highlands expected loss=0.3181818 P(node) =0.08560311
## class counts: 30 14
## probabilities: 0.682 0.318
##
## Node number 27: 56 observations
## predicted class=Lowlands expected loss=0.375 P(node) =0.1089494
## class counts: 21 35
## probabilities: 0.375 0.625
##
## Node number 30: 47 observations, complexity param=0.01361868
## predicted class=Lowlands expected loss=0.4893617 P(node) =0.09143969
## class counts: 23 24
## probabilities: 0.489 0.511
## left son=60 (33 obs) right son=61 (14 obs)
## Primary splits:
## Conv < 3065 to the left, improve=3.0175000, (0 missing)
## CA_intercrop < 5065.28 to the left, improve=2.7166340, (0 missing)
## CA_rotation < 6995.561 to the left, improve=1.9750760, (0 missing)
## CA_sole < 1817.72 to the left, improve=0.9395782, (0 missing)
## Surrogate splits:
## CA_rotation < 7854.061 to the left, agree=0.766, adj=0.214, (0 split)
## CA_intercrop < 6182.448 to the left, agree=0.723, adj=0.071, (0 split)
##
## Node number 31: 181 observations
## predicted class=Lowlands expected loss=0.281768 P(node) =0.3521401
## class counts: 51 130
## probabilities: 0.282 0.718
##
## Node number 60: 33 observations, complexity param=0.01167315
## predicted class=Highlands expected loss=0.3939394 P(node) =0.06420233
## class counts: 20 13
## probabilities: 0.606 0.394
## left son=120 (24 obs) right son=121 (9 obs)
## Primary splits:
## CA_intercrop < 4212.765 to the left, improve=1.8409090, (0 missing)
## CA_rotation < 5422.03 to the right, improve=0.6987522, (0 missing)
## CA_sole < 1817.72 to the left, improve=0.4848485, (0 missing)
## Conv < 605.9165 to the left, improve=0.4375758, (0 missing)
## Surrogate splits:
## CA_rotation < 4640.931 to the right, agree=0.788, adj=0.222, (0 split)
## Conv < 717.6149 to the right, agree=0.758, adj=0.111, (0 split)
##
## Node number 61: 14 observations
## predicted class=Lowlands expected loss=0.2142857 P(node) =0.02723735
## class counts: 3 11
## probabilities: 0.214 0.786
##
## Node number 120: 24 observations
## predicted class=Highlands expected loss=0.2916667 P(node) =0.04669261
## class counts: 17 7
## probabilities: 0.708 0.292
##
## Node number 121: 9 observations
## predicted class=Lowlands expected loss=0.3333333 P(node) =0.01750973
## class counts: 3 6
## probabilities: 0.333 0.667
plot(fit)
# plot tree
plot(fit, uniform=TRUE,
main="Regression Tree for yield based on Agro-ecology ")
text(fit, use.n=TRUE, all=TRUE, cex=.6)
###################################################################
rpart.tree <- rpart(agroecoregion~Conv+CA_rotation+CA_sole+CA_intercrop, data=machine_py)
rparty.tree <- as.party(rpart.tree)
rparty.tree
##
## Model formula:
## agroecoregion ~ Conv + CA_rotation + CA_sole + CA_intercrop
##
## Fitted party:
## [1] root
## | [2] CA_intercrop < 1261.02413: Highlands (n = 117, err = 29.9%)
## | [3] CA_intercrop >= 1261.02413
## | | [4] CA_sole >= 3498.75942
## | | | [5] CA_intercrop >= 3751.36174: Highlands (n = 62, err = 30.6%)
## | | | [6] CA_intercrop < 3751.36174
## | | | | [7] CA_sole < 4532.19876: Highlands (n = 44, err = 31.8%)
## | | | | [8] CA_sole >= 4532.19876: Lowlands (n = 56, err = 37.5%)
## | | [9] CA_sole < 3498.75942
## | | | [10] CA_intercrop >= 7260.05747: Highlands (n = 7, err = 0.0%)
## | | | [11] CA_intercrop < 7260.05747
## | | | | [12] CA_rotation >= 4526.09261
## | | | | | [13] Conv < 3065
## | | | | | | [14] CA_intercrop < 4212.76525: Highlands (n = 24, err = 29.2%)
## | | | | | | [15] CA_intercrop >= 4212.76525: Lowlands (n = 9, err = 33.3%)
## | | | | | [16] Conv >= 3065: Lowlands (n = 14, err = 21.4%)
## | | | | [17] CA_rotation < 4526.09261: Lowlands (n = 181, err = 28.2%)
##
## Number of inner nodes: 8
## Number of terminal nodes: 9
plot(rparty.tree,cex=.01,width=16, height=11, pointsize=5)
# create additional plots
par(mfrow=c(1,2)) # two plots on one page
rsq.rpart(rpart.tree) # visualize cross-validation results
##
## Classification tree:
## rpart(formula = agroecoregion ~ Conv + CA_rotation + CA_sole +
## CA_intercrop, data = machine_py)
##
## Variables actually used in tree construction:
## [1] CA_intercrop CA_rotation CA_sole Conv
##
## Root node error: 257/514 = 0.5
##
## n= 514
##
## CP nsplit rel error xerror xstd
## 1 0.182879 0 1.00000 1.04280 0.044068
## 2 0.101167 1 0.81712 0.91829 0.043961
## 3 0.027237 2 0.71595 0.79377 0.043160
## 4 0.013619 5 0.63424 0.80934 0.043299
## 5 0.011673 7 0.60700 0.81712 0.043364
## 6 0.010000 8 0.59533 0.80934 0.043299
## Warning in rsq.rpart(rpart.tree): may not be applicable for this method
rpart.tree <- rpart(Conv~agroecoregion, data=machine_py)
rparty.tree <- as.party(rpart.tree)
rparty.tree
##
## Model formula:
## Conv ~ agroecoregion
##
## Fitted party:
## [1] root: 2464.893 (n = 514, err = 1545338258.1)
##
## Number of inner nodes: 0
## Number of terminal nodes: 1
plot(rparty.tree,cex=.01,width=16, height=11, pointsize=5)
# create additional plots
par(mfrow=c(1,2)) # two plots on one page
rsq.rpart(rpart.tree) #
##
## Regression tree:
## rpart(formula = Conv ~ agroecoregion, data = machine_py)
##
## Variables actually used in tree construction:
## character(0)
##
## Root node error: 1545338258/514 = 3006495
##
## n= 514
##
## CP nsplit rel error xerror xstd
## 1 0.00064052 0 1 0 0