library(readxl)
databan <- read_excel('Clase 11.05.2023/Data_ModeloAF_2022.xlsx',
sheet = "alo2")
head(databan)
## # A tibble: 6 × 13
## A10 A20 A30 A40 A50 A60 A70 A_1_3LCL A1_2LCL ALCL Kumar B
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 207. 140. 103. 84.6 65.5 51.3 44.3 65.5 44.3 24.4 0.0747 0.167
## 2 250. 191. 154. 127. 109. 92.0 76.5 92.0 62.4 55.9 0.118 0.362
## 3 286. 207. 168. 134. 112. 92.0 81.5 112. 64.6 58.0 0.274 0.522
## 4 191. 151. 134. 103. 86.7 71.6 62.4 86.7 62.4 62.4 0.235 0.530
## 5 277. 188. 154. 131. 103. 81.5 68.3 103. 60.2 42.1 0.426 0.670
## 6 413. 219. 176. 151. 112. 97.5 79.0 112. 73.5 71.6 1.26 1.54
## # ℹ 1 more variable: DM <dbl>
require(stats)
pc <- prcomp(x = databan,
center = TRUE,
scale. = TRUE)
print(pc)
## Standard deviations (1, .., p=13):
## [1] 3.53709429 0.51086070 0.31610830 0.18581100 0.17465942 0.13425889
## [7] 0.11619086 0.10458127 0.08757657 0.07332399 0.06654957 0.04286862
## [13] 0.03540655
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4 PC5 PC6
## A10 0.2642267 -0.63375048 -0.34530470 -0.32850723 -0.28944164 0.42516216
## A20 0.2755683 -0.39130416 -0.03333290 0.06618973 0.29850562 -0.40776312
## A30 0.2785656 -0.25022949 -0.03246763 0.39230612 0.29518435 -0.11223955
## A40 0.2813391 -0.11934981 0.01169124 0.22276525 0.01029357 -0.10900723
## A50 0.2818247 -0.04763707 0.05287199 0.25755903 -0.03086914 -0.08786534
## A60 0.2806135 0.07949373 0.22522288 0.18019393 -0.30409794 0.08249630
## A70 0.2805758 0.08004135 0.23943366 0.16431181 -0.28891962 0.12337283
## A_1_3LCL 0.2782607 0.04619014 0.38439008 -0.31067432 -0.46632161 -0.28154887
## A1_2LCL 0.2783192 0.12981410 0.41537424 -0.13453452 0.34261475 0.17817216
## ALCL 0.2792227 0.09548626 0.14840484 -0.52443595 0.47241764 0.21563166
## Kumar 0.2753299 0.29762498 -0.42986651 -0.28072798 -0.06441458 -0.47424279
## B 0.2751223 0.34607099 -0.42376437 -0.02720779 -0.01044230 -0.00127710
## DM 0.2761327 0.34196263 -0.24685327 0.29861178 0.02641507 0.46510140
## PC7 PC8 PC9 PC10 PC11
## A10 -0.0467366878 0.05913596 -0.13435564 0.08646792 -0.033963004
## A20 -0.0003311553 0.46599391 0.43729545 0.01648874 -0.005556239
## A30 0.2016813121 -0.51337148 0.02569906 0.35111015 -0.216357039
## A40 0.1564969506 -0.18671744 -0.45849705 -0.41228775 0.317399879
## A50 -0.2041917820 0.15253134 -0.22703071 -0.44719248 0.030500303
## A60 -0.3486823010 -0.24319674 0.05311498 0.42665266 0.195566684
## A70 -0.3498031950 0.07177027 0.41748762 -0.16552372 0.072632077
## A_1_3LCL 0.5653251441 -0.02956493 0.04995293 -0.03075034 -0.215353444
## A1_2LCL -0.0515932370 0.43115589 -0.46021797 0.35879137 -0.047405108
## ALCL -0.0745307063 -0.39538334 0.26131897 -0.29286223 0.101200927
## Kumar -0.3996001034 -0.09002008 -0.20111863 0.05881064 -0.333558921
## B 0.3238117183 0.14175943 0.11461831 0.21779247 0.623948860
## DM 0.2338151849 0.14859567 0.12252210 -0.15245594 -0.497363915
## PC12 PC13
## A10 0.05178697 -0.04252076
## A20 -0.22537450 0.22494550
## A30 0.27357459 -0.23511452
## A40 0.05946857 0.54865262
## A50 -0.22961414 -0.68583025
## A60 -0.55432032 0.14616907
## A70 0.62642134 0.07044133
## A_1_3LCL -0.04338115 -0.08527045
## A1_2LCL 0.18543802 0.01405947
## ALCL -0.13639832 -0.03741312
## Kumar 0.10505481 0.09473437
## B 0.09315148 -0.20150773
## DM -0.20287954 0.19014216
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.5371 0.51086 0.31611 0.18581 0.17466 0.13426 0.11619
## Proportion of Variance 0.9624 0.02008 0.00769 0.00266 0.00235 0.00139 0.00104
## Cumulative Proportion 0.9624 0.98246 0.99015 0.99280 0.99515 0.99654 0.99758
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.10458 0.08758 0.07332 0.06655 0.04287 0.03541
## Proportion of Variance 0.00084 0.00059 0.00041 0.00034 0.00014 0.00010
## Cumulative Proportion 0.99842 0.99901 0.99942 0.99976 0.99990 1.00000
plot(pc)
screeplot(pc, type = "line", main = "Scree plot")
library(devtools)
## Warning: package 'devtools' was built under R version 4.2.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.2.3
install_github("vqv/ggbiplot")
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install Rtools 4.2 from https://cran.r-project.org/bin/windows/Rtools/ or https://www.r-project.org/nosvn/winutf8/ucrt3/.
## Skipping install of 'ggbiplot' from a github remote, the SHA1 (7325e880) has not changed since last install.
## Use `force = TRUE` to force installation
library(ggbiplot)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
ggbiplot(pc)
library(ggbiplot)
data(databan)
## Warning in data(databan): data set 'databan' not found
ban.pca <- prcomp(databan, scale. = TRUE)
ggbiplot(ban.pca, obs.scale = 1, var.scale = 1,
ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')
biplot2 = ggbiplot(pcobj = ban.pca,alpha = 0.2,varname.adjust = T,
choices = c(1,2),
obs.scale = 1, var.scale = 1, # Scaling of axis
#labels = row.names(databan), # Add labels as rownames
labels.size = 3,
varname.size = 2,
varname.abbrev = TRUE, # Abbreviate variable names (TRUE)
var.axes = T, # Remove variable vectors (TRUE)
circle = T, # Add unit variance circle (TRUE)
ellipse = TRUE)+ # Adding ellipses
scale_color_discrete(name = '')+
theme(legend.direction = 'horizontal', legend.position = 'top')
print(biplot2)
biplot3 = biplot2 + theme(legend.position = "bottom")
print(biplot3)
theme1 = biplot3 + theme_bw()
theme2 = biplot3 + theme_classic()
theme3 = biplot3 + theme_dark()
theme4 = biplot3 + theme_gray()
theme5 = biplot3 + theme_minimal()
theme6 = biplot3 + theme_void()
Leaf area prediction models from growth measurements in Andean Blueberry in the nursery. (Agraz) Artículo de banano
library(readxl)
databan <- read_excel('Clase 11.05.2023/Data_ModeloAF_2022.xlsx',
sheet = "alo2")
head(databan)
## # A tibble: 6 × 13
## A10 A20 A30 A40 A50 A60 A70 A_1_3LCL A1_2LCL ALCL Kumar B
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 207. 140. 103. 84.6 65.5 51.3 44.3 65.5 44.3 24.4 0.0747 0.167
## 2 250. 191. 154. 127. 109. 92.0 76.5 92.0 62.4 55.9 0.118 0.362
## 3 286. 207. 168. 134. 112. 92.0 81.5 112. 64.6 58.0 0.274 0.522
## 4 191. 151. 134. 103. 86.7 71.6 62.4 86.7 62.4 62.4 0.235 0.530
## 5 277. 188. 154. 131. 103. 81.5 68.3 103. 60.2 42.1 0.426 0.670
## 6 413. 219. 176. 151. 112. 97.5 79.0 112. 73.5 71.6 1.26 1.54
## # ℹ 1 more variable: DM <dbl>
dim(databan)
## [1] 34 13
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.2.3
R=cor(databan)
ggcorrplot(R,lab=T)
require(stats)
pc <- prcomp(x = databan,
center = TRUE,
scale. = TRUE)
print(pc)
## Standard deviations (1, .., p=13):
## [1] 3.53709429 0.51086070 0.31610830 0.18581100 0.17465942 0.13425889
## [7] 0.11619086 0.10458127 0.08757657 0.07332399 0.06654957 0.04286862
## [13] 0.03540655
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4 PC5 PC6
## A10 0.2642267 -0.63375048 -0.34530470 -0.32850723 -0.28944164 0.42516216
## A20 0.2755683 -0.39130416 -0.03333290 0.06618973 0.29850562 -0.40776312
## A30 0.2785656 -0.25022949 -0.03246763 0.39230612 0.29518435 -0.11223955
## A40 0.2813391 -0.11934981 0.01169124 0.22276525 0.01029357 -0.10900723
## A50 0.2818247 -0.04763707 0.05287199 0.25755903 -0.03086914 -0.08786534
## A60 0.2806135 0.07949373 0.22522288 0.18019393 -0.30409794 0.08249630
## A70 0.2805758 0.08004135 0.23943366 0.16431181 -0.28891962 0.12337283
## A_1_3LCL 0.2782607 0.04619014 0.38439008 -0.31067432 -0.46632161 -0.28154887
## A1_2LCL 0.2783192 0.12981410 0.41537424 -0.13453452 0.34261475 0.17817216
## ALCL 0.2792227 0.09548626 0.14840484 -0.52443595 0.47241764 0.21563166
## Kumar 0.2753299 0.29762498 -0.42986651 -0.28072798 -0.06441458 -0.47424279
## B 0.2751223 0.34607099 -0.42376437 -0.02720779 -0.01044230 -0.00127710
## DM 0.2761327 0.34196263 -0.24685327 0.29861178 0.02641507 0.46510140
## PC7 PC8 PC9 PC10 PC11
## A10 -0.0467366878 0.05913596 -0.13435564 0.08646792 -0.033963004
## A20 -0.0003311553 0.46599391 0.43729545 0.01648874 -0.005556239
## A30 0.2016813121 -0.51337148 0.02569906 0.35111015 -0.216357039
## A40 0.1564969506 -0.18671744 -0.45849705 -0.41228775 0.317399879
## A50 -0.2041917820 0.15253134 -0.22703071 -0.44719248 0.030500303
## A60 -0.3486823010 -0.24319674 0.05311498 0.42665266 0.195566684
## A70 -0.3498031950 0.07177027 0.41748762 -0.16552372 0.072632077
## A_1_3LCL 0.5653251441 -0.02956493 0.04995293 -0.03075034 -0.215353444
## A1_2LCL -0.0515932370 0.43115589 -0.46021797 0.35879137 -0.047405108
## ALCL -0.0745307063 -0.39538334 0.26131897 -0.29286223 0.101200927
## Kumar -0.3996001034 -0.09002008 -0.20111863 0.05881064 -0.333558921
## B 0.3238117183 0.14175943 0.11461831 0.21779247 0.623948860
## DM 0.2338151849 0.14859567 0.12252210 -0.15245594 -0.497363915
## PC12 PC13
## A10 0.05178697 -0.04252076
## A20 -0.22537450 0.22494550
## A30 0.27357459 -0.23511452
## A40 0.05946857 0.54865262
## A50 -0.22961414 -0.68583025
## A60 -0.55432032 0.14616907
## A70 0.62642134 0.07044133
## A_1_3LCL -0.04338115 -0.08527045
## A1_2LCL 0.18543802 0.01405947
## ALCL -0.13639832 -0.03741312
## Kumar 0.10505481 0.09473437
## B 0.09315148 -0.20150773
## DM -0.20287954 0.19014216
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.5371 0.51086 0.31611 0.18581 0.17466 0.13426 0.11619
## Proportion of Variance 0.9624 0.02008 0.00769 0.00266 0.00235 0.00139 0.00104
## Cumulative Proportion 0.9624 0.98246 0.99015 0.99280 0.99515 0.99654 0.99758
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.10458 0.08758 0.07332 0.06655 0.04287 0.03541
## Proportion of Variance 0.00084 0.00059 0.00041 0.00034 0.00014 0.00010
## Cumulative Proportion 0.99842 0.99901 0.99942 0.99976 0.99990 1.00000
pc$x[,1]
## [1] -5.5335811 -4.5879442 -4.3188115 -4.9399491 -4.5738055 -3.7614023
## [7] -4.5324365 -3.0585592 -3.2970446 -3.1130556 -2.2289360 -2.2744136
## [13] -1.4186807 -1.8546635 -1.3327440 -1.1640201 0.1885050 0.3028724
## [19] 1.3339061 2.2483950 1.2297556 -0.3844374 1.2144815 2.6099049
## [25] 3.5319686 3.2292649 3.7573781 3.9880269 3.3171467 3.3843121
## [31] 5.7154023 5.1747054 5.6415551 5.5069043
plot(pc$x[,1:2],pch=16)
cor(pc$x[,1:2])
## PC1 PC2
## PC1 1.000000e+00 1.100695e-14
## PC2 1.100695e-14 1.000000e+00
plot(pc)
screeplot(pc, type = "line", main = "Scree plot")
library(devtools)
install_github("vqv/ggbiplot")
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install Rtools 4.2 from https://cran.r-project.org/bin/windows/Rtools/ or https://www.r-project.org/nosvn/winutf8/ucrt3/.
## Skipping install of 'ggbiplot' from a github remote, the SHA1 (7325e880) has not changed since last install.
## Use `force = TRUE` to force installation
library(ggbiplot)
ggbiplot(pc)
ggbiplot(pc,varname.size = 2)
#cor_pc1=pc$rotation[,1]
#cor_pc2=pc$rotation[,2]
#plot(0,0,xlim=c(-1.1),ylim=(-0.7,4),asa)
#plot(databan$A50,databan$DM,pch=16)
#ang=atan(cor_pc2/cor_pc1)*180/pi
#sort(abs(ang))
cor_pc1 = pc$rotation[,1]
cor_pc2 = pc$rotation[,2]
plot(0,0, xlim=c(-1,1), ylim=c(-0.7,0.4), asp=1)
arrows(rep(0, 13), rep(0, 13),
cor_pc1, cor_pc2)
text(cor_pc1+runif(13, 0, 0.1), cor_pc2, names(cor_pc1))
ang = atan(cor_pc2/cor_pc1)*180/pi
sort(cor_pc1)
## A10 B Kumar A20 DM A_1_3LCL A1_2LCL A30
## 0.2642267 0.2751223 0.2753299 0.2755683 0.2761327 0.2782607 0.2783192 0.2785656
## ALCL A70 A60 A40 A50
## 0.2792227 0.2805758 0.2806135 0.2813391 0.2818247
text(cor_pc1+runif(13, 0.2, 0.4), cor_pc2,
paste0(round(ang, 2), '°'))
sort(abs(ang))
## A_1_3LCL A50 A60 A70 ALCL A40 A1_2LCL A30
## 9.424925 9.594067 15.816658 15.922140 18.879238 22.987648 25.005367 41.932675
## Kumar DM B A20 A10
## 47.228391 51.079343 51.515723 54.845629 67.367501
plot(databan$A50, databan$DM, pch=16)
library(ggbiplot)
data(databan)
## Warning in data(databan): data set 'databan' not found
ban.pca <- prcomp(databan, scale. = TRUE)
ggbiplot(ban.pca, obs.scale = 1, var.scale = 1,
ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')
biplot2 = ggbiplot(pcobj = ban.pca,alpha = 0.2,varname.adjust = T,
choices = c(1,2),
obs.scale = 1, var.scale = 1, # Scaling of axis
#labels = row.names(databan), # Add labels as rownames
labels.size = 3,
varname.size = 2,
varname.abbrev = TRUE, # Abbreviate variable names (TRUE)
var.axes = T, # Remove variable vectors (TRUE)
circle = T, # Add unit variance circle (TRUE)
ellipse = TRUE)+ # Adding ellipses
scale_color_discrete(name = '')+
theme(legend.direction = 'horizontal', legend.position = 'top')
print(biplot2)
biplot3 = biplot2 + theme(legend.position = "bottom")
print(biplot3)
theme1 = biplot3 + theme_bw()
theme2 = biplot3 + theme_classic()
theme3 = biplot3 + theme_dark()
theme4 = biplot3 + theme_gray()
theme5 = biplot3 + theme_minimal()
theme6 = biplot3 + theme_void()
2 dimensiones muestran el 98% de los datos, (Cumulative proportion)
Encontrar el ángulo permite detectar la mejor variable represnetada en el PC1 Support Vector Machine LV, determina cual es la variable que se asocia más a la mortalidad de Bogotá
#Ahora sacamos el nuevo modelo usando solo materia seca y A50
mod1=lm(DM~A50,databan)
summary(mod1)
##
## Call:
## lm(formula = DM ~ A50, data = databan)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6676 -1.0634 -0.0506 1.1241 3.2293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.372811 0.698360 -6.262 5.1e-07 ***
## A50 0.052451 0.002341 22.404 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.784 on 32 degrees of freedom
## Multiple R-squared: 0.9401, Adjusted R-squared: 0.9382
## F-statistic: 501.9 on 1 and 32 DF, p-value: < 2.2e-16
#DM=-4.37+0.05*A50
pred=mod1$fitted.values
plot(databan$DM,pred,pch=16)
cor(databan$DM,pred)
## [1] 0.9695714
cor(databan$DM,pred)^2
## [1] 0.9400687
cor(databan$DM,pred)**2
## [1] 0.9400687
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(123)
data2 = databan %>%
mutate(id = seq(nrow(databan)),
pred = pred)
data_train = data2 %>%
sample_frac(0.7)
data_test = data2 %>%
filter(!id %in% data_train$id)
mod2 = lm(DM ~ A50, data_train)
summary(mod2)
##
## Call:
## lm(formula = DM ~ A50, data = data_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1577 -1.2208 0.0675 1.4601 2.7815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.613740 0.876939 -5.261 2.81e-05 ***
## A50 0.054438 0.002857 19.053 3.66e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.83 on 22 degrees of freedom
## Multiple R-squared: 0.9429, Adjusted R-squared: 0.9403
## F-statistic: 363 on 1 and 22 DF, p-value: 3.662e-15
pred_train = mod2$fitted.values
plot(data_train$DM, pred_train,
pch=16, cex=2)
pred_test = predict(mod2, data_test)
plot(data_test$DM, pred_test,
pch=16, cex=2)
cor(data_test$DM, pred_test)**2
## [1] 0.966838
Machine Learning Debe ajustar los datos a lo que se solicita Discriminiant analysis for estimating meristematic differentiation point based on banana