The above figure from Bures and Zedek (2014) shows the negative relationship between nuclear DNA content and chromosome number in holokinetics based on Carex data. This relationship has been shown to be significant in the Cyperaceae by Lipnerova et al. 2013.
This figure shows the relationships between genome size and chromosome number in the genus Carex for all species studied by Lipnerova et al. 2013. The grey arrows show the putative effects of particular mechanisms responsible for the evolution of genome size and chromosome number. Closely related species are represented by the same symbol. The three main grades (ploidy) levels are separated with dash lines.
This histogram shows the standard deviation and standard error of the mean for chromosome counts. I used this information to decide to eliminate samples with SD values greater than four and SEM values greater than 1.
(diff.gs.chromo.SEM.SD<-setdiff(gs.chromo.high.SEM$Collection_number, gs.chromo.high.SD$Collection_number ))
## character(0)
(diff.gs.chromo.SD.SEM<-setdiff(gs.chromo.high.SD$Collection_number ,gs.chromo.high.SEM$Collection_number))
## character(0)
## Count_mean Genome_size
## Count_mean 1.00 0.53
## Genome_size 0.53 1.00
(shapiro.test(gs.chromo.high.SEM$Genome_size))
##
## Shapiro-Wilk normality test
##
## data: gs.chromo.high.SEM$Genome_size
## W = 0.89993, p-value = 0.0004183
(shapiro.test(sqrt(gs.chromo.high.SEM$Genome_size)))
##
## Shapiro-Wilk normality test
##
## data: sqrt(gs.chromo.high.SEM$Genome_size)
## W = 0.92336, p-value = 0.002797
(shapiro.test((gs.chromo.high.SEM$Genome_size)^(1/3)))
##
## Shapiro-Wilk normality test
##
## data: (gs.chromo.high.SEM$Genome_size)^(1/3)
## W = 0.92822, p-value = 0.004255
(shapiro.test(log(gs.chromo.high.SEM$Genome_size)))
##
## Shapiro-Wilk normality test
##
## data: log(gs.chromo.high.SEM$Genome_size)
## W = 0.93221, p-value = 0.006058
(shapiro.test(gs.chromo.high.SEM$Count_mean))
##
## Shapiro-Wilk normality test
##
## data: gs.chromo.high.SEM$Count_mean
## W = 0.85583, p-value = 1.895e-05
(shapiro.test(log(gs.chromo.high.SEM$Count_mean)))
##
## Shapiro-Wilk normality test
##
## data: log(gs.chromo.high.SEM$Count_mean)
## W = 0.90542, p-value = 0.0006411
gs.chromo.lm<-lm(gs.chromo.high.SEM$Genome_size~gs.chromo.high.SEM$Count_mean)
sqrt.chromo.lm<-lm(sqrt(gs.chromo.high.SEM$Genome_size)~gs.chromo.high.SEM$Count_mean)
cbrt.chromo.lm<-lm(((gs.chromo.high.SEM$Genome_size)^(1/3))~gs.chromo.high.SEM$Count_mean)
log.gs.chromo.lm<-lm(log(gs.chromo.high.SEM$Genome_size)~gs.chromo.high.SEM$Count_mean)
log.log.gs.chromo.lm<-lm(log(gs.chromo.high.SEM$Genome_size)~log(gs.chromo.high.SEM$Count_mean))
##
## Call:
## lm(formula = gs.chromo.high.SEM$Genome_size ~ gs.chromo.high.SEM$Count_mean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3890.5 -737.3 -326.2 1215.4 4544.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2311.25 600.86 3.847 0.000346 ***
## gs.chromo.high.SEM$Count_mean 83.41 19.10 4.367 6.51e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1566 on 49 degrees of freedom
## Multiple R-squared: 0.2802, Adjusted R-squared: 0.2655
## F-statistic: 19.07 on 1 and 49 DF, p-value: 6.507e-05
##
## Call:
## lm(formula = log(gs.chromo.high.SEM$Genome_size) ~ gs.chromo.high.SEM$Count_mean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89042 -0.15071 -0.00145 0.25325 0.65286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.906064 0.125336 63.079 < 2e-16 ***
## gs.chromo.high.SEM$Count_mean 0.016761 0.003984 4.207 0.00011 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3266 on 49 degrees of freedom
## Multiple R-squared: 0.2653, Adjusted R-squared: 0.2503
## F-statistic: 17.7 on 1 and 49 DF, p-value: 0.0001099
##
## Call:
## lm(formula = log(gs.chromo.high.SEM$Genome_size) ~ log(gs.chromo.high.SEM$Count_mean))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.91619 -0.12172 -0.01116 0.20373 0.65100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3619 0.3948 16.116 < 2e-16
## log(gs.chromo.high.SEM$Count_mean) 0.6151 0.1186 5.186 4.08e-06
##
## (Intercept) ***
## log(gs.chromo.high.SEM$Count_mean) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3062 on 49 degrees of freedom
## Multiple R-squared: 0.3544, Adjusted R-squared: 0.3412
## F-statistic: 26.89 on 1 and 49 DF, p-value: 4.075e-06
## Collection_number Genome_size Count_mean
## 27 MM_7591 3719.7 18.9
## 36 TE2016_192 1630.0 18.0
## 37 TE2016_193 3297.0 20.5
## 45 TE2016_202 3373.5 19.9
## 46 TE2016_203 3319.5 19.0
## 47 TE2016_204 3443.0 21.6
## 53 TE2016_213 3335.0 21.4
## 56 TE2016_226 3010.0 18.5
## 59 TE2016_229 3279.5 18.9
## 69 TE2016_241 3230.2 19.2
## 72 TE2016_244 3412.3 19.8
## 73 TE2016_245 3431.9 20.8
## 74 TE2016_247 3207.7 19.8
## 80 TE2016_253 3229.2 20.0
## 87 TE2016_262 3327.8 18.8
## 93 TE2016_269 3850.6 20.0
## 103 TE2016_282 3580.9 22.0
## 104 TE2016_283 3564.9 21.3
## 122 TE2016_302 3541.5 19.0
## 166 TE2016_347 3235.0 24.5
## 176 TE2016_357 3204.0 26.7
## 181 TE2016_362 2770.0 17.0
## 189 TE2016_383 3348.2 23.5
## 196 TE2016_390 3182.8 21.4
## Collection_number Genome_size Count_mean
## 28 MM_7626 4846.8 24.0
## 39 TE2016_196 5156.5 35.6
## 50 TE2016_207 5807.5 24.5
## 58 TE2016_228 4892.0 29.0
## 161 TE2016_341 5703.0 20.8
## 175 TE2016_356 4891.0 24.5
## 178 TE2016_359 5304.0 31.7
## 179 TE2016_360 4925.0 30.0
## 198 TE2016_392 6113.5 16.0
## Collection_number Genome_size Count_mean
## 33 TE2016_182 8396.5 52.0
## 35 TE2016_191 6814.0 40.1
## 40 TE2016_197 7544.0 39.8
## 41 TE2016_198 6760.5 38.0
## 49 TE2016_206 5380.0 40.7
## 61 TE2016_231 6607.5 38.4
## 82 TE2016_255 5905.1 39.0
## 90 TE2016_265 6888.9 38.8
## 100 TE2016_278 7211.4 34.0
## 120 TE2016_300 7052.9 40.0
## 162 TE2016_342 10192.0 40.0
## 177 TE2016_358 6879.0 39.5
## 180 TE2016_361 7152.0 40.0
## 185 TE2016_371 7366.5 38.4
## 188 TE2016_382 6136.9 34.7
## Collection_number Genome_size Count_mean
## 190 TE2016_384 2558.0 49.6
## 206 TE2016_400 3709.4 60.5
## 210 TE2016_404 3749.7 63.6
##
## Bartlett test of homogeneity of variances
##
## data: gs.chromo.clust.df$Genome_size by gs.chromo.clust.df[, 6]
## Bartlett's K-squared = 18.16, df = 3, p-value = 0.0004077
##
## Bartlett test of homogeneity of variances
##
## data: gs.chromo.clust.df$Count_mean by gs.chromo.clust.df[, 6]
## Bartlett's K-squared = 16.365, df = 3, p-value = 0.0009546
##
## Bartlett test of homogeneity of variances
##
## data: log(gs.chromo.clust.df$Genome_size) by gs.chromo.clust.df[, 6]
## Bartlett's K-squared = 4.0109, df = 3, p-value = 0.2603
##
## Bartlett test of homogeneity of variances
##
## data: log(gs.chromo.clust.df$Count_mean) by gs.chromo.clust.df[, 6]
## Bartlett's K-squared = 13.574, df = 3, p-value = 0.003547
(gs.chromo.lda <- lda(gs.chromo.clust.df[,6] ~ log(gs.chromo.clust.df$Genome_size) + gs.chromo.clust.df$Count_mean))
## Call:
## lda(gs.chromo.clust.df[, 6] ~ log(gs.chromo.clust.df$Genome_size) +
## gs.chromo.clust.df$Count_mean)
##
## Prior probabilities of groups:
## 1 2 3 4
## 0.47058824 0.17647059 0.29411765 0.05882353
##
## Group means:
## log(gs.chromo.clust.df$Genome_size) gs.chromo.clust.df$Count_mean
## 1 8.082655 20.43750
## 2 8.570756 26.23333
## 3 8.855295 39.56000
## 4 8.098346 57.90000
##
## Coefficients of linear discriminants:
## LD1 LD2
## log(gs.chromo.clust.df$Genome_size) 2.9060881 -6.1733577
## gs.chromo.clust.df$Count_mean 0.2085208 0.1577215
##
## Proportion of trace:
## LD1 LD2
## 0.7198 0.2802
gs.chromo.lda.pred<- predict(gs.chromo.lda)
# redo with jacknifing
gs.chromo.lda.CV<- lda(gs.chromo.clust.df[,6] ~ log(gs.chromo.clust.df$Genome_size) + gs.chromo.clust.df$Count_mean, CV=TRUE)
##
## 1 2 3 4
## 1 24 0 0 0
## 2 0 8 0 0
## 3 0 0 16 0
## 4 0 0 0 3
## [1] 1
## Precision Recall F1
## 1 1 1 1
## 2 1 1 1
## 3 1 1 1
## 4 1 1 1