Genomic prediction of wheat traits using transfer learning approach with different sets of SNPs between subpopulations

A summary of variance components, SNP effects, transfer learning, and cross-validation of wheat traits

Author

Blessing Olabosoye

Published

May 13, 2024

1 Raw phenotypes file

Code
raw_pheno <- read_xlsx("Raw_phenotypes.xlsx")
dim(raw_pheno)
[1] 11424    19
Code
#raw_pheno[1:5, 1:5]

2 Ajusted phenotypes file

Code
adj_pheno <- read.csv("adj_phenotypes.csv")
adj_pheno <- adj_pheno[,-c(8:20)]
dim(adj_pheno)
[1] 11424    20
Code
#Unique phenotype
unique_pheno <- adj_pheno[!duplicated(adj_pheno$ID), ]
dim(unique_pheno)
[1] 10375    20

3 Quality control

  • Phenotypes with duplicates
Code
Pheno_dup <- adj_pheno|>
  group_by(ID) |>
  summarise(count = n()) |>
  filter((count >1)) #adj_phenotypes w/ dupllicates
Pheno_dup
# A tibble: 8 × 2
  ID         count
  <chr>      <int>
1 AYT-O-1994   150
2 AYT-O-2008   150
3 AYT-O-2021   150
4 AYT-O-2031   150
5 AYT-O-2033   150
6 AYT-O-2048   150
7 AYT-O-2052     7
8 AYT-O-2062   150
Code
#Phenotype without IDs in Pheno_dup(ID==NAs)
y_check <- adj_pheno[!(adj_pheno$ID%in%Pheno_dup$ID),]
dim(y_check)
[1] 10367    20
Code
summary(y_check)
       X              ID                Range            Row       
 Min.   :    1   Length:10367       Min.   : 1.00   Min.   :  1.0  
 1st Qu.: 2854   Class :character   1st Qu.: 6.00   1st Qu.:120.0  
 Median : 5707   Mode  :character   Median :12.00   Median :238.0  
 Mean   : 5709                      Mean   :12.49   Mean   :238.5  
 3rd Qu.: 8564                      3rd Qu.:18.00   3rd Qu.:357.0  
 Max.   :11423                      Max.   :24.00   Max.   :476.0  
                                                                   
   Location              Year       GrowthHabit       adj_NDVI        
 Length:10367       Min.   :2014   Min.   :1.000   Min.   :-0.495391  
 Class :character   1st Qu.:2014   1st Qu.:2.000   1st Qu.:-0.048276  
 Mode  :character   Median :2014   Median :2.000   Median : 0.004227  
                    Mean   :2014   Mean   :2.365   Mean   :-0.000125  
                    3rd Qu.:2014   3rd Qu.:3.000   3rd Qu.: 0.050554  
                    Max.   :2014   Max.   :8.000   Max.   : 0.266729  
                                   NA's   :9       NA's   :6          
 adj_PlantHeight      adj_LeafLoss       adj_Glaucousness   
 Min.   :-4.064010   Min.   :-4.538660   Min.   :-4.027074  
 1st Qu.:-0.627596   1st Qu.:-1.150594   1st Qu.:-1.485844  
 Median :-0.026288   Median :-0.113439   Median :-0.373189  
 Mean   :-0.000589   Mean   : 0.000217   Mean   : 0.001182  
 3rd Qu.: 0.727739   3rd Qu.: 1.175993   3rd Qu.: 1.251936  
 Max.   : 3.513630   Max.   : 4.686780   Max.   : 6.144128  
                                                            
 adj_Greenness       adj_LeafWidth        adj_Biomass        adj_Yellows       
 Min.   :-4.768577   Min.   :-3.942787   Min.   :-4.87902   Min.   :-1.417275  
 1st Qu.:-1.097327   1st Qu.:-0.949922   1st Qu.:-0.69445   1st Qu.:-0.605954  
 Median : 0.084101   Median : 0.018754   Median : 0.04168   Median :-0.402283  
 Mean   : 0.000695   Mean   : 0.000468   Mean   :-0.00068   Mean   : 0.000276  
 3rd Qu.: 1.049599   3rd Qu.: 0.950542   3rd Qu.: 0.72300   3rd Qu.: 0.415370  
 Max.   : 3.755015   Max.   : 3.388017   Max.   : 3.33733   Max.   : 7.033529  
                                         NA's   :107                           
  adj_Maturity        adj_GrainYield      adj_TestWeight      
 Min.   :-22.252293   Min.   :-2643.450   Min.   :-10.367262  
 1st Qu.: -4.352765   1st Qu.: -365.178   1st Qu.: -0.984365  
 Median :  0.584405   Median :   27.237   Median :  0.185297  
 Mean   : -0.001202   Mean   :    0.168   Mean   : -0.000197  
 3rd Qu.:  4.641852   3rd Qu.:  389.405   3rd Qu.:  1.165534  
 Max.   : 15.284852   Max.   : 2080.314   Max.   :  4.942738  
                      NA's   :123         NA's   :26          
 adj_GrainSizeTKW     adj_GrainProtein  
 Min.   :-14.646842   Min.   :-2.85376  
 1st Qu.: -2.922147   1st Qu.:-0.51878  
 Median : -0.136602   Median :-0.02669  
 Mean   :  0.000636   Mean   : 0.00060  
 3rd Qu.:  2.790503   3rd Qu.: 0.49063  
 Max.   : 20.142392   Max.   : 3.73942  
 NA's   :27           NA's   :72        

4 Genotypes file

Code
R_file <- readRDS("Genotypes.rds")
R_file[1:5, 1:5] #non-standardized markers
           AXAFFSNP-00001 AXAFFSNP-00002 AXAFFSNP-00003 AXAFFSNP-00004
AYT-O-1848              1             -1             -1              1
AYT-O-0974             -1             -1             -1              1
AYT-S-0579              1             -1             -1              1
AYT-S-0299              1             -1             -1              1
AYT-S-0664              1             -1             -1              1
           AXAFFSNP-00005
AYT-O-1848              1
AYT-O-0974              1
AYT-S-0579              1
AYT-S-0299              1
AYT-S-0664              1

5 Compare variables between phenotypes and genomic marker

Code
length(unique(adj_pheno$ID))
[1] 10375
Code
all(rownames(R_file)%in%adj_pheno$ID)
[1] TRUE
Code
all(unique(adj_pheno$ID)%in%rownames(R_file))
[1] TRUE
Code
table(table(adj_pheno$ID))

    1     7   150 
10367     1     7 
Code
table(table(rownames(R_file)))

    1 
10375 
Code
#Genotype with phenotypes that contain some variables
Geno_n_pheo <- R_file[(rownames(R_file)%in%y_check$ID),]
dim(Geno_n_pheo)
[1] 10367 17181

6 Sample 2000 individuals and 1000 markers to this study

Code
set.seed(234)
smp <- sample(1:nrow(y_check), 2000)
yp <- y_check[smp, ]

smp2 <- sample(1:ncol(Geno_n_pheo), 1000)
Gp <- Geno_n_pheo[yp$ID, smp2]
all.equal(rownames(Gp), yp$ID)
[1] TRUE

7 Genomic relationship matrices

7.1 Obtain Z and G by group

7.2 Using all individuals, i.e, combination of all unique elements from all groups

Code
Pc <- colMeans(Gp+1)/2
summary(Pc)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1940  0.5208  0.5048  0.8110  1.0000 
Code
Qc <- 1 - Pc
summary(Qc)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1890  0.4792  0.4952  0.8060  1.0000 
Code
MFc <- pmin(Pc, 1-Pc)
MFc_1 <- MFc > 0.05

Gpc <- Gp[, MFc_1]
Pc_1 <- colMeans(Gpc+1)/2
summary(Pc_1)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.05033 0.26970 0.51969 0.50428 0.74035 0.94971 
Code
Qc_1 <- 1 - Pc_1
Pz <- 2*(Pc_1 - 0.5)

Zc <- sweep(Gpc, 2, Pz, FUN = "-")
Zt <- tcrossprod(Zc)
s2pq <- sum(2*Pc_1*Qc_1)

G_all <- Zt/s2pq
summary(diag(G_all))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.8887  1.6445  1.9015  1.8592  2.0794  2.5086 

7.2.1 Splitting rows of Z matrix into group using k-means

Code
set.seed(78)
ks <- kmeans(Zc, centers = 3, nstart = 10)
mk_rand1 <- sample(ncol(Zc), 350)
mk_rand2 <- sample(ncol(Zc), 670)
mk_rand3 <- sample(ncol(Zc), 490)

gh1 <- Zc[ks$cluster ==1,mk_rand1]
gh2 <- Zc[ks$cluster ==2,mk_rand2]
gh3 <- Zc[ks$cluster ==3,mk_rand3]

cat("Zcombined:", dim(Zc), "\n") #Z complete
Zcombined: 2000 762 
Code
cat("Z for pop 1:", dim(gh1), "\n") #Z1 for population 1
Z for pop 1: 523 350 
Code
cat("Z2:", dim(gh2), "\n") #Z2 for population 2
Z2: 781 670 
Code
cat("Z3:", dim(gh3), "\n") #Z3 for population 3
Z3: 696 490 

7.3 Computing T matrices

  • \[T_i = (Z'_iZ_i)^{-1}Z'_iZ_c\]

  • \[Z_c = Z_iT_i\]

Code
T <- solve(t(gh1)%*%gh1)%*%t(gh1)%*%Zc[rownames(gh1),]   #T for population 1
cat("T1:", dim(T), "\n") #T1 for population 1
T1: 350 762 
Code
T2 <- solve(t(gh2)%*%gh2)%*%t(gh2)%*%Zc[rownames(gh2),]   #T for population 2
cat("T2:", dim(T2), "\n") #T2 for population 2
T2: 670 762 
Code
T3 <- solve(t(gh3)%*%gh3)%*%t(gh3)%*%Zc[rownames(gh3),]   #T for population 3
cat("T3:", dim(T3), "\n") #T3 for population 3
T3: 490 762 
Code
#Check
Z_c <- gh1%*%T
cat("Z_complete for individuals in pop 1:", dim(Z_c), "\n") #Z complete
Z_complete for individuals in pop 1: 523 762 
Code
Z_c2 <- gh2%*%T2
Z_c3 <- gh3%*%T3

7.3.1 Splitting p-allele frequency into groups and obtain their GRM

Code
P1 <- Pc_1[mk_rand1]
P2 <- Pc_1[mk_rand2]
P3 <- Pc_1[mk_rand3]

Gm1 <- tcrossprod(gh1)/sum(2*P1*(1-P1))
Gm2 <- tcrossprod(gh2)/sum(2*P2*(1-P2))
Gm3 <- tcrossprod(gh3)/sum(2*P3*(1-P3))

Gm1t <- tcrossprod(Z_c)/sum(2*P1*(1-P1))
Gm2t <- tcrossprod(Z_c2)/sum(2*P2*(1-P2))
Gm3t <- tcrossprod(Z_c3)/sum(2*P3*(1-P3))

8 Genomic Prediction

  • Fit Growth Habit as random variable
  • Fit Plant height as response variable
  • Output predicted breeding values by group
  • Output variance components by group

8.1 Phenotype by group

Code
y_pheno <- data.frame(yp[, -1], row.names = 1)
y_pheno$GrowthHabit <- as.factor(y_pheno$GrowthHabit)

yc <- y_pheno[ks$cluster ==1,]
yc2 <- y_pheno[ks$cluster ==2,]
yc3 <- y_pheno[ks$cluster ==3,]

8.2 Fit GBLUP in each group

Code
gpred <- model_gb(Gm1, yc, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -282.1665 converged in: 13 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 520 first 5 animals shown:
                  [,1]
AYT-O-0747 -0.44074298
PYT-S-4057  0.00796764
PYT-S-5267  0.12953880
PYT-S-3457  0.34140883
PYT-S-5009  0.07658197
PYT-S-1578  0.42420852
.......................................................... 
Code
gpred2 <- model_gb(Gm2, yc2, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -326.0185 converged in: 8 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 781 first 5 animals shown:
                  [,1]
AYT-S-0214  0.03764242
PYT-S-0872  0.45664402
PYT-S-0310 -0.12313513
PYT-S-0643  0.02194633
PYT-S-2639  0.28402193
PYT-S-0675  0.09190713
.......................................................... 
Code
gpred3 <- model_gb(Gm3, yc3, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -298.8649 converged in: 7 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 696 first 5 animals shown:
                  [,1]
PYT-S-2817 -1.19681095
PYT-S-3520 -0.36418352
PYT-S-4630 -0.57914896
PYT-S-3700 -0.76634901
PYT-S-3915 -0.02313317
PYT-S-2556  1.08935045
.......................................................... 
Code
gpred_A <- model_gb(G_all , y_pheno, y_name = 'adj_PlantHeight', random = 'GrowthHabit') #combined
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -909.2252 converged in: 7 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 1997 first 5 animals shown:
                  [,1]
AYT-O-0747 -0.54099792
AYT-S-0214  0.04623541
PYT-S-4057  0.67847766
PYT-S-0872  0.30137969
PYT-S-5267 -0.19742503
PYT-S-0310 -0.40657306
.......................................................... 

8.3 Variance components by group

Code
gpd <- list(gpred, gpred2, gpred3, gpred_A)
lapply(gpd, varcomp)
[[1]]
                Estimate    StdError     prop.var          se
G            0.179209063 0.048377077  0.167278925 0.043521348
GrowthHabit -0.005157945 0.001443757 -0.004814576 0.001325488
In           0.897267722 0.068530585  0.837535651 0.058339582

[[2]]
              Estimate   StdError   prop.var         se
G           0.25888580 0.04361968 0.27168586 0.04533680
GrowthHabit 0.08656524 0.07154837 0.09084528 0.06855478
In          0.60743548 0.04005889 0.63746886 0.06591133

[[3]]
              Estimate   StdError   prop.var         se
G           0.23405155 0.05035106 0.24339387 0.04685682
GrowthHabit 0.03816556 0.03608073 0.03968896 0.03617382
In          0.68939934 0.04504291 0.71691717 0.05962710

[[4]]
              Estimate   StdError   prop.var         se
G           0.19344007 0.02597810 0.19490686 0.02405537
GrowthHabit 0.03947198 0.03029067 0.03977128 0.02935576
In          0.75956235 0.02729989 0.76532186 0.03570723

8.4 Estimation of SNP effects and variances

  • \(\hat{g}_i = Z'_iG^{-1}_i\hat{u}_i\)
Code
gwa_c1 <- gwas(gpred, t(gh1))
gwa_c2 <- gwas(gpred2, t(gh2))
gwa_c3 <- gwas(gpred3, t(gh3))

gwa_cA <- gwas(gpred_A, t(Zc))

9 With T matrix

9.1 Fit GBLUP in each group

Code
gpredt <- model_gb(Gm1t, yc, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -279.0025 converged in: 21 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 520 first 5 animals shown:
                   [,1]
AYT-O-0747 -0.246604850
PYT-S-4057  0.385375632
PYT-S-5267 -0.003119352
PYT-S-3457  0.934870667
PYT-S-5009  0.131613753
PYT-S-1578  0.398211385
.......................................................... 
Code
gpred2t <- model_gb(Gm2t, yc2, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -324.685 converged in: 8 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 781 first 5 animals shown:
                    [,1]
AYT-S-0214  0.0002249325
PYT-S-0872  0.4971828702
PYT-S-0310 -0.1587074104
PYT-S-0643 -0.0049202925
PYT-S-2639  0.2260634653
PYT-S-0675  0.0557953340
.......................................................... 
Code
gpred3t <- model_gb(Gm3t, yc3, y_name = 'adj_PlantHeight',random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ 1

random effects equation:
~G + GrowthHabit + In

log-likelihood: -295.5092 converged in: 7 iterations 

estimated fixed effects:
[1] test.st p.value
<0 rows> (or 0-length row.names)

estimated variance components:
[1] prop.var
<0 rows> (or 0-length row.names)

breeding value predicted for 696 first 5 animals shown:
                  [,1]
PYT-S-2817 -1.33857179
PYT-S-3520 -0.24700235
PYT-S-4630 -0.70179429
PYT-S-3700 -0.74068871
PYT-S-3915  0.09333683
PYT-S-2556  1.06080448
.......................................................... 

9.2 Variance components by group

Code
gpd <- list(gpredt, gpred2t, gpred3t)
lapply(gpd, varcomp)
[[1]]
                Estimate     StdError     prop.var          se
G            0.107359293 0.0212844839  0.112238399 0.022932896
GrowthHabit -0.006068053 0.0003846691 -0.006343825 0.000183565
In           0.855237849 0.0629864682  0.894105426 0.033623549

[[2]]
              Estimate   StdError   prop.var         se
G           0.24323157 0.03994221 0.26213591 0.04427342
GrowthHabit 0.09122674 0.07490737 0.09831703 0.07309824
In          0.59342513 0.03976594 0.63954706 0.06799320

[[3]]
              Estimate   StdError   prop.var         se
G           0.17814204 0.03592232 0.20390673 0.03951195
GrowthHabit 0.03610586 0.03440114 0.04132785 0.03788353
In          0.65939683 0.04450621 0.75476542 0.05671778

9.3 Estimation of SNP effects and variances

  • \(\hat{g}_i = Z'_iG^{-1}_i\hat{u}_i\)
Code
gwa_1 <- gwas(gpredt, t(Z_c))
gwa_2 <- gwas(gpred2t, t(Z_c2))
gwa_3 <- gwas(gpred3t, t(Z_c3))

10 Transfer learning

  • \[y_i = X_i \beta_i + Z_i\Bigg(\sum_{i \neq j}\alpha_{ij}\hat{g}_j + g_i \Bigg) + e_i \tag{1}\]

10.1 From Population 2 & 3 to 1

Code
P1_2 <- get_polys(gh1, gwa_c2, scale = FALSE)
P1_3 <- get_polys(gh1, gwa_c3, scale = FALSE)
yp1 <- add_poly(yc, P1_2, P1_3)


gpred1_2 <- model_gb(Gm1, yp1, y_name = 'adj_PlantHeight',fixed = c('poly1', 'poly2'), random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ poly1 + poly2

random effects equation:
~G + GrowthHabit + In

log-likelihood: -280.6772 converged in: 13 iterations 

estimated fixed effects:
                Estimate    StdError    test.st   p.value
(Intercept) 1.544094e-01 0.125050015 1.23478143 0.2169119
poly1       3.652112e-05 0.001049638 0.03479401 0.9722440
poly2       1.571549e-03 0.001668810 0.94171851 0.3463368

estimated variance components:
                Estimate    StdError     prop.var
G            0.183759671 0.049644864  0.171103869
GrowthHabit -0.004934129 0.001773975 -0.004594308
In           0.895140065 0.068670058  0.833490439

breeding value predicted for 520 first 5 animals shown:
                  [,1]
AYT-O-0747 -0.44167260
PYT-S-4057 -0.03756636
PYT-S-5267  0.16189930
PYT-S-3457  0.34638072
PYT-S-5009  0.05965172
PYT-S-1578  0.43251649
.......................................................... 

10.2 From Population 2 & 3 to 1 with T matrix

  • \[y_i = X_i \beta_i + Z_i\Bigg(\sum_{i \neq j}\alpha_{ij}T_i\hat{g}_j + g_i \Bigg) + e_i \tag{2}\]
Code
P1_2t <- get_polys(Z_c, gwa_2, scale = FALSE)
P1_3t <- get_polys(Z_c, gwa_3, scale = FALSE)
ypt <- add_poly(yc, P1_2t, P1_3t)


gpred1_2t <- model_gb(Gm1t, ypt, y_name = 'adj_PlantHeight',fixed = c('poly1', 'poly2'), random =  'GrowthHabit')
.......................................................... 
summary gblup analysis of trait: adj_PlantHeight 

fixed effects equation:
y ~ poly1 + poly2

random effects equation:
~G + GrowthHabit + In

log-likelihood: -277.165 converged in: 15 iterations 

estimated fixed effects:
                Estimate     StdError   test.st   p.value
(Intercept) 0.0954107382 0.1046169949 0.9120004 0.3617685
poly1       0.0005411233 0.0006501459 0.8323105 0.4052337
poly2       0.0011662603 0.0010952842 1.0648015 0.2869657

estimated variance components:
                Estimate     StdError     prop.var
G            0.093744758 0.0240417164  0.097367522
GrowthHabit -0.005976068 0.0004393901 -0.006207013
In           0.875024200 0.0679212324  0.908839491

breeding value predicted for 520 first 5 animals shown:
                 [,1]
AYT-O-0747 -0.2577357
PYT-S-4057  0.2294913
PYT-S-5267  0.1271897
PYT-S-3457  0.6905762
PYT-S-5009  0.2131915
PYT-S-1578  0.3875762
.......................................................... 

10.3 SNP effects for TL

Code
gw1_2t <- gwas(gpred1_2t, t(Z_c))

gw_TL1 <- gpred1_2t$coefm[2]*gwa_2$ghat + gpred1_2t$coefm[3]*gwa_3$ghat + gwa_1$ghat

10.4 Plot

Code
 cr1 <- cor(gwa_1$ghat, gw_TL1)
 tibble(x= gwa_1[,1], y = gw_TL1) %>%
   ggplot(., aes(x,y))+
   geom_point(color = "gold")+
   geom_abline(color = "firebrick")+
   geom_label(x = -3.7, y = 4, label = paste0("r: ", round(cr1, 2)))+
   labs(title = "Individual complete SNP effects with Transfer complete SNP Effects",
        subtitle = "Group 1",
        x = expression(hat(g)[Ind]),
        y = expression(hat(g)[Transfer]))+
   theme(plot.title = element_text(color = "firebrick"), plot.subtitle = element_text(color = "goldenrod4"), axis.title.x = element_text(colour = "firebrick"),axis.title.y = element_text(colour = "goldenrod4"))