#For at home /
fit_table_purity_complete<-readRDS("~/fit_table_purity_complete.rds")
normalised_PC_TCGA <-readRDS("~/compress_normalised_narm_TCGA.rds")
log_fold_gene_change_sig<-readRDS("~/log_fold_gene_change_sig.rds")
#Required Packages
library(dplyr)
library(ggplot2)
library(plotly)
library(tidyr)
library(broom)
library(tibble)
fit_table_purity_complete<- fit_table_purity_complete %>% rename(mean_value = mean)
For all genes
#Plot slope mean vs inflection mean for all genes
fit_table_purity_complete %>% #Data Table of all fit data
filter(parameters == "beta[2,1]"|parameters == "inflection[1]") %>% #Select required parameters
select(parameters, mean_value, Gene_name) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val for TUBP10 / possible dup?
pivot_wider(names_from = parameters, values_from = mean_value) %>% #Reshape table for plotting
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")%>%
ggplot(aes(x=Inflection_mean, y= Slope_mean)) +
geom_point(col="dodgerblue", alpha=0.7) +
stat_smooth(se = FALSE, method = 'lm', col="red") + #Plot fitted linear eq
labs(title = "Slope mean vs Inflection vs (For Complete TABI Run)")
#SUmmary Statistics on Linear Relationship
summary(lm(Slope_mean~Inflection_mean, data = fit_table_purity_complete %>% #Data Table of all fit data
filter(parameters == "beta[2,1]"|parameters == "inflection[1]") %>%
select(parameters, mean_value, Gene_name) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different valfor TUBP10 / possible dup?
pivot_wider(names_from = parameters, values_from = mean_value) %>%
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")))
##
## Call:
## lm(formula = Slope_mean ~ Inflection_mean, data = fit_table_purity_complete %>%
## filter(parameters == "beta[2,1]" | parameters == "inflection[1]") %>%
## select(parameters, mean_value, Gene_name) %>% filter(!Gene_name ==
## "TUBBP10") %>% pivot_wider(names_from = parameters, values_from = mean_value) %>%
## rename(Inflection_mean = "inflection[1]", Slope_mean = "beta[2,1]"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.01080 -0.20024 -0.01188 0.18781 3.05010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.018679 0.001978 9.442 <2e-16 ***
## Inflection_mean -0.202334 0.006071 -33.330 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3683 on 34859 degrees of freedom
## Multiple R-squared: 0.03088, Adjusted R-squared: 0.03086
## F-statistic: 1111 on 1 and 34859 DF, p-value: < 2.2e-16
Comment: In general there is a negative trend between the calculated slope mean and inflection mean (i.e. genes which change expression levels late in disease progression tend to be those which decrease in gene expression over disease progression). Potential outliers (genes with large absolute values of mean slope, in the top right hand corner of the graph and the bottom left hand corner of the graph) may be skewing the plotting of a linear relationship between mean slope and inflection, hence the true trend may be larger and stronger than that calculated above.
For genes with differential gene expression
#Isolate genes with significant slope / non-zero-slope at 5% level of sig
#i.e changes in gene expression
non_zero_slope_genes<- fit_table_purity_complete %>%
filter(parameters == "beta[2,1]") %>%
filter(`2.5%`<0&`97.5%`<0|`2.5%`>0&`97.5%`>0) %>% #Selecting either all > 0 for 95% interval / or all < 0
select(Gene_name) #Which genes
fit_table_purity_complete_sig <-inner_join(non_zero_slope_genes, fit_table_purity_complete)
#Plot only those genes with statistically significant changes in gene expression
sig_genes_plot<-fit_table_purity_complete_sig %>% #Data Table of fit data for significant genes
filter(parameters == "beta[2,1]"|parameters == "inflection[1]") %>% #Select required parameters
select(parameters, "mean_value", Gene_name) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val / causing problems with pivot wider
pivot_wider(names_from = parameters, values_from = "mean_value") %>% #Reshape for plotting
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")%>% #Rename more informative name
ggplot(aes(x=Inflection_mean, y= Slope_mean, Gene=Gene_name)) +
geom_point(alpha = 0.7, col = "dodgerblue") +
stat_smooth(se = FALSE, method = 'lm', col="red") +
labs(title = "Slope mean vs Inflection mean (For genes with non-zero slopes)")
ggplotly(sig_genes_plot)
#Summary Statistics on Linear Relationship (only for statisitically signficant slopes)
summary(lm(Slope_mean~Inflection_mean, data = fit_table_purity_complete_sig %>% #Data Table of fit data of significant Genes
filter(parameters == "beta[2,1]"|parameters == "inflection[1]") %>%
select(parameters, mean_value, Gene_name) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val / causing problems with pivot wider
pivot_wider(names_from = parameters, values_from = mean_value) %>% #Reshape for lm test
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")))
##
## Call:
## lm(formula = Slope_mean ~ Inflection_mean, data = fit_table_purity_complete_sig %>%
## filter(parameters == "beta[2,1]" | parameters == "inflection[1]") %>%
## select(parameters, mean_value, Gene_name) %>% filter(!Gene_name ==
## "TUBBP10") %>% pivot_wider(names_from = parameters, values_from = mean_value) %>%
## rename(Inflection_mean = "inflection[1]", Slope_mean = "beta[2,1]"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9790 -0.4956 -0.1245 0.5147 2.8790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.06514 0.01129 5.769 8.65e-09 ***
## Inflection_mean -0.15437 0.01630 -9.472 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6699 on 3693 degrees of freedom
## Multiple R-squared: 0.02372, Adjusted R-squared: 0.02345
## F-statistic: 89.71 on 1 and 3693 DF, p-value: < 2.2e-16
fit_sig_log_mean<-left_join(non_zero_slope_genes, #Join table of genes with non-zero slopes with log mean data
normalised_PC_TCGA %>% #Create table of log mean data
select(transcript, `read_count normalised`) %>%
group_by(transcript) %>%
summarise(log_mean = mean(log(`read_count normalised`+1))) %>% #Calculate mean log read count + 1 for each gene
rename(Gene_name = transcript)) #Rename gene_name to enable joining
log_plot<-inner_join(fit_sig_log_mean, fit_table_purity_complete_sig) %>% #Data Table of fit data with log mean data
filter(parameters == "beta[2,1]"|parameters == "inflection[1]") %>%
select(parameters, "mean_value", Gene_name, log_mean) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val / causing problems with pivot wider
pivot_wider(names_from = parameters, values_from = "mean_value") %>% #Reshape for plotting
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")%>%
ggplot(aes(x=Inflection_mean, y= Slope_mean, col=log_mean, Gene=Gene_name)) +
geom_point(alpha = 0.7) +
labs(title = "Slope mean vs Inflection mean (For genes with non-zero slopes)")
ggplotly(log_plot)
Comment: Even when only genes which have non-zero slopes are considering, the negative trend discussed above continues. As the plot where genes are coloured by mean log read count shows, it is likely that genes in the top right hand corner and bottom left of the plot are caused by outliers, shown by the low mean read count (suggesting that it is likely and the trend is being skewed by a few large non-zero gene counts).
log_fold_change<-left_join(fit_table_purity_complete_sig, #Combine fit data with log fold gene change data
log_fold_gene_change_sig %>%
rename(Gene_name = gene_name) %>% #Rename to combine by Gene_name
select(log_fold_change, log_fold_2_change, Gene_name)) %>%
distinct() %>%
filter(parameters == "inflection[1]"|parameters == "beta[2,1]") %>%
select(parameters, "mean_value", Gene_name, log_fold_change) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val / causing problems with pivot wider
filter(!Gene_name == "HNRNPA1P54") %>%
pivot_wider(names_from = parameters, values_from = "mean_value") %>% #Reshape for plotting
rename(Inflection_mean="inflection[1]", Slope_mean ="beta[2,1]")%>%
ggplot(aes(x=Inflection_mean, y= Slope_mean, Gene = Gene_name, col=log_fold_change)) +
geom_point(alpha = 0.7)
ggplotly(log_fold_change)
#Plot of log fold gene change vs slope mean
slope_v_log_change<-left_join(fit_table_purity_complete_sig, #Combine fit data with log fold gene change data
log_fold_gene_change_sig %>%
rename(Gene_name = gene_name) %>% #Rename to combine by Gene_name
select(log_fold_change, log_fold_2_change, Gene_name)) %>%
distinct() %>%
filter(parameters == "beta[2,1]") %>%
select(parameters, "mean_value", Gene_name, log_fold_change) %>%
filter(!Gene_name == "TUBBP10") %>% #Two different val / causing problems with pivot wider
ggplot(aes(x=mean_value, y= log_fold_change, Gene = Gene_name)) +
geom_point(alpha = 0.7, col="dodgerblue") +
labs(title="Log fold gene change vs slope mean")
ggplotly(slope_v_log_change)
### Effect of outliers
#Plot normalised read conts on log scale, against scale CAPRA_S
#Overlay TABI sigmoid equation (using mean values for each parameter value in the equation)
#Details of plotting function in appendix
plot_ed_with_line("HOXA1", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("MIR210HG", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("RBM44", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("SLC24A1", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("HNRNPA1P54", normalised_PC_TCGA, fit_table_purity_complete_sig)
Comment:
sd_plot<-fit_table_purity_complete_sig %>%
filter(parameters =="inflection[1]") %>%
ggplot(aes(y=mean_value, x=sd, Gene=Gene_name)) +
geom_point(alpha=0.5, col="dodgerblue")
ggplotly(sd_plot)
#Boxplot of inflection standard deviation
fit_table_purity_complete_sig %>%
filter(parameters =="inflection[1]") %>%
ggplot(aes(y=sd)) +
geom_boxplot()
summary((fit_table_purity_complete_sig %>%
filter(parameters =="inflection[1]"))$sd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0785 0.7357 0.8848 0.8445 0.9749 1.9903
Comment: As can be seen in the above, the mean and standard deviation of inflection cluster around zero and one respectively (may be a reflection of our prior on inflection of N(0,1). Low standard deviation for inflection is a indication of a sharper inflection (a gene with a more sigmoidal trend), so points towards the left of the plot are good candidates for sigmoidal trends. From this point on, localised inflection refers to genes with an inflection sd in the first quartile (i.e. <0.7375) and wide inflection refers to genes with an inflection sd in the fourth quantile (i.e. >0.9749).
(Inflection sd <0.7375, in the first quartile)
#Plot normalised read conts on log scale, against scale CAPRA_S
#Overlay TABI sigmoid equation (using mean values for each parameter value in the equation)
#Details of plotting function in appendix
plot_ed_with_line("SPOCK2", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("NEK2", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("MAFTRR", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("CCDC178", normalised_PC_TCGA, fit_table_purity_complete_sig)
plot_ed_with_line("KIF20A", normalised_PC_TCGA, fit_table_purity_complete_sig)
(Inflection sd >0.9749, in the fourth quartile)
#Plot normalised read conts on log scale, against scale CAPRA_S
#Overlay TABI sigmoid equation (using mean values for each parameter value in the equation)
#Details of plotting function in appendix
plot_ed_with_line("ACPP", normalised_PC_TCGA, fit_table_purity_complete)
plot_ed_with_line("SPATA5L1", normalised_PC_TCGA, fit_table_purity_complete)
plot_ed_with_line("JRKL", normalised_PC_TCGA, fit_table_purity_complete)
plot_ed_with_line("ABHD17B", normalised_PC_TCGA, fit_table_purity_complete)
Of all declared statistically significant overlap between genes with localised early changes and genes sets (shown in table below) noticeable is the recurrent instance of larger than expected numbers of genes from gene sets involved with cell cycle and division (e.g mitotic spindle assembly, sister chromatid segregation, organelle fission, cell cycle gene groups etc.) as well as those involved in immunological processes (phagocytosis, response to bacterium, B cell receptor signalling pathway, complement activation etc.). In comparison, genes with localised late changes were recurrently significantly associated with overlap in genes sets involved in cellular, muscular and neural development and contraction (muscle filament sliding, sarcomere organization, actomyosin structure organization, neurotransmitter transport, neuron differentiation, neurogenesis, long-term synaptic potentiation etc.).
When this analysis was reapplied using Molecular Signatures Database (MSigDB, with collections C1, C3, C4, C5, C6, C7, H) a similar trend of gene set overlap was found, now with localised early changes also showing associations with genes sets of hallmark changes found in other cancers (such as lung and breast) and late changes showed association between known genes sets of hallmarks of prostate cancer.
From: http://geneontology.org/
Early Changes Table:
#Table of overlap for early localised changes
(read.csv("overlap_early_pantha.csv") %>%
as_tibble %>%
rename(Gene_Function_Set = "ï..GO.biological.process.complete",
Total_Number_Genes_Set="Homo.sapiens...REFLIST..20996.",
Number_Overlap="upload_1..203.",
Expected_Number_overlap = "upload_1..expected.",
Fewer_or_Greater_Than_Expected= "upload_1..over.under.",
Fold_Enrichment = "upload_1..fold.Enrichment.",
Raw_Pval = "upload_1..raw.P.value.",
FDR = "upload_1..FDR."))[1:20,]
## # A tibble: 20 x 8
## Gene_Function_S~ Total_Number_Ge~ Number_Overlap Expected_Number~
## <fct> <int> <int> <dbl>
## 1 mitotic spindle~ 37 5 0.36
## 2 phagocytosis, r~ 106 13 1.02
## 3 mitotic sister ~ 106 12 1.02
## 4 mitotic nuclear~ 142 15 1.37
## 5 B cell receptor~ 126 13 1.22
## 6 phagocytosis, e~ 128 13 1.24
## 7 sister chromati~ 139 14 1.34
## 8 meiotic chromos~ 90 9 0.87
## 9 plasma membrane~ 137 13 1.32
## 10 membrane invagi~ 144 13 1.39
## 11 complement acti~ 161 14 1.56
## 12 humoral immune ~ 166 14 1.6
## 13 nuclear chromos~ 217 18 2.1
## 14 nuclear divisio~ 282 23 2.73
## 15 complement acti~ 176 14 1.7
## 16 meiotic nuclear~ 155 12 1.5
## 17 organelle fissi~ 311 24 3.01
## 18 positive regula~ 169 13 1.63
## 19 meiotic cell cy~ 169 12 1.63
## 20 immunoglobulin ~ 204 14 1.97
## # ... with 4 more variables: Fewer_or_Greater_Than_Expected <fct>,
## # Fold_Enrichment <dbl>, Raw_Pval <dbl>, FDR <dbl>
Late changes Table:
#Table of overlap for late localised changes
(read.csv("overlap_late_pantha.csv") %>%
as_tibble() %>%
rename(Gene_Function_Set = "ï..GO.biological.process.complete",
Total_Number_Genes_Set="Homo.sapiens...REFLIST..20996.",
Number_Overlap="upload_1..522.",
Expected_Number_overlap = "upload_1..expected.",
Fewer_or_Greater_Than_Expected= "upload_1..over.under.",
Fold_Enrichment = "upload_1..fold.Enrichment.",
Raw_Pval = "upload_1..raw.P.value.",
FDR = "upload_1..FDR."))[1:20,]
## # A tibble: 20 x 8
## Gene_Function_S~ Total_Number_Ge~ Number_Overlap Expected_Number~
## <fct> <int> <int> <dbl>
## 1 skeletal myofib~ 11 5 0.27
## 2 skeletal muscle~ 9 4 0.22
## 3 actin-myosin fi~ 38 10 0.94
## 4 muscle filament~ 38 10 0.94
## 5 myofibril assem~ 61 15 1.52
## 6 cardiac myofibr~ 21 5 0.52
## 7 muscle fiber de~ 56 11 1.39
## 8 skeletal muscle~ 31 6 0.77
## 9 regulation of p~ 33 6 0.82
## 10 sarcomere organ~ 40 7 0.99
## 11 smooth muscle c~ 52 9 1.29
## 12 long-term synap~ 47 8 1.17
## 13 actomyosin stru~ 110 18 2.73
## 14 negative regula~ 45 7 1.12
## 15 musculoskeletal~ 45 7 1.12
## 16 multicellular o~ 45 7 1.12
## 17 muscle contract~ 246 38 6.12
## 18 cellular compon~ 102 15 2.54
## 19 actin-mediated ~ 90 13 2.24
## 20 cardiac muscle ~ 65 9 1.62
## # ... with 4 more variables: Fewer_or_Greater_Than_Expected <fct>,
## # Fold_Enrichment <fct>, Raw_Pval <dbl>, FDR <dbl>
From: https://www.gsea-msigdb.org/gsea/msigdb/index.jsp
Early Changes Table:
#Table of Gene Set Overlap with Early Localised Changes
early_overlap_non_pantha<-read.csv("~/Overlap_Early_Non_Pantha.csv") %>%
as_tibble()
(early_overlap_non_pantha %>%
select(-"Description") %>%
rename(Gene_set = "ï..Gene.Set.Name",
Total_Genes_In_Gene_Set = X..Genes.in.Gene.Set..K.,
Genes_In_Overlap = X..Genes.in.Overlap..k.,
Expected_Number = k.K))[1:20,]
## # A tibble: 20 x 6
## Gene_set Total_Genes_In_~ Genes_In_Overlap Expected_Number p.value
## <fct> <int> <int> <dbl> <dbl>
## 1 GSE1575~ 200 25 0.125 3.84e-23
## 2 GSE3911~ 200 25 0.125 3.84e-23
## 3 MODULE_~ 262 27 0.103 1.22e-22
## 4 GSE1575~ 200 23 0.115 1.53e-20
## 5 GNF2_CD~ 62 15 0.242 6.46e-19
## 6 GNF2_CC~ 68 15 0.221 2.96e-18
## 7 GNF2_CD~ 56 14 0.25 5.83e-18
## 8 GNF2_CC~ 57 14 0.246 7.68e-18
## 9 GSE1354~ 193 19 0.0984 6.55e-16
## 10 GNF2_CE~ 62 13 0.210 1.11e-15
## 11 HALLMAR~ 200 19 0.095 1.27e-15
## 12 GSE3096~ 200 18 0.09 1.87e-14
## 13 GO_ORGA~ 463 25 0.054 2.15e-14
## 14 GSE1354~ 184 17 0.0924 6.54e-14
## 15 GSE1441~ 190 17 0.0895 1.11e-13
## 16 GNF2_PC~ 68 12 0.176 1.29e-13
## 17 GSE2724~ 175 16 0.0914 4.26e-13
## 18 GSE1441~ 188 16 0.0851 1.29e-12
## 19 GO_SIST~ 189 16 0.0847 1.40e-12
## 20 GO_NUCL~ 262 18 0.0687 1.93e-12
## # ... with 1 more variable: FDR.q.value <dbl>
Late changes Table:
late_overlap_non_pantha<-read.csv("~/Overlap_Late_Non.csv") %>%
as_tibble()
(late_overlap_non_pantha %>%
select(-"Description") %>%
rename(Gene_set = "ï..Gene.Set.Name",
Total_Genes_In_Gene_Set = X..Genes.in.Gene.Set..K.,
Genes_In_Overlap = X..Genes.in.Overlap..k.,
Expected_Number = k.K))[1:20,]
## # A tibble: 20 x 6
## Gene_set Total_Genes_In_~ Genes_In_Overlap Expected_Number p.value
## <fct> <int> <int> <dbl> <dbl>
## 1 GO_MUSC~ 360 43 0.119 1.68e-23
## 2 GO_MUSC~ 473 48 0.102 5.37e-23
## 3 CAGCTG_~ 1537 82 0.0534 7.73e-20
## 4 GO_MUSC~ 659 48 0.0728 4.77e-17
## 5 GO_CONT~ 229 29 0.127 5.94e-17
## 6 HALLMAR~ 200 26 0.13 1.28e-15
## 7 SRF_C 215 26 0.121 7.50e-15
## 8 GO_MUSC~ 410 35 0.0854 8.65e-15
## 9 SRF_Q6 247 27 0.109 2.80e-14
## 10 GO_MYOF~ 70 16 0.229 4.62e-14
## 11 GO_SUPR~ 946 51 0.0539 7.48e-13
## 12 TATAAA_~ 1313 62 0.0472 7.59e-13
## 13 MODULE_~ 333 29 0.0871 1.07e-12
## 14 GO_MUSC~ 408 32 0.0784 1.23e-12
## 15 GO_CELL~ 868 48 0.0553 1.48e-12
## 16 SRF_Q4 229 24 0.105 1.90e-12
## 17 GO_MUSC~ 378 30 0.0794 4.64e-12
## 18 GO_ACTI~ 744 43 0.0578 5.43e-12
## 19 GO_INTR~ 1708 71 0.0416 6.04e-12
## 20 GO_NEUR~ 1599 68 0.0425 6.45e-12
## # ... with 1 more variable: FDR.q.value <dbl>
#Definition Function used to plot single gene read counts vs CAPRA_S
#Plot normalised read conts on log scale, against scale CAPRA_S
#Overlay TABI sigmoid equation (using mean values for each parameter value in the equation)
#Details of plotting function in appendix
plot_ed_with_line <- function(gene_name, data, fit_table) {
#Required packages / functions
require(ggplot2)
require(plotly)
#Requires scale_design and parse_formula from TABI to create scaled design matrix
#Select only paramters for specified gene
Gene_table <- fit_table %>%
filter(Gene_name == gene_name)
plot_line <- function(x) {
eta <- as.numeric(Gene_table %>%
filter(parameters == "inflection[1]") %>%
select(mean_value))
beta <- as.numeric(Gene_table %>%
filter(parameters == "beta[2,1]") %>%
select(mean_value))
y_0 <- as.numeric(Gene_table %>%
filter(parameters == "y_cross[1]") %>%
select(mean_value))
A<- as.numeric(Gene_table %>%
filter(parameters == "A[1]") %>%
select(mean_value))
top<-(y_0-A)*(1+exp(eta*beta))
bottom<-(1+exp(-x*beta+eta*beta))
return(log10(exp(A+(top/bottom))))}
parse_formula <- function(fm) {
if(attr(terms(fm), "response") == 1) stop("The formula must be of the kind \"~ covariates\" ")
else as.character(attr(terms(fm), "variables"))[-1]
}
scale_design = function(df, formula){
df %>%
setNames(c("sample_idx", "(Intercept)", parse_formula(formula))) %>%
gather(cov, value, -sample_idx) %>%
group_by(cov) %>%
mutate( value = ifelse( !grepl("Intercept", cov) & length(union(c(0,1), value)) != 2, scale(value), value )) %>%
ungroup() %>%
spread(cov, value) %>%
arrange(as.integer(sample_idx)) %>%
select(`(Intercept)`, one_of(parse_formula(formula)))
}
#Create table with scaled CAPRA_S and normalised gene count for graphing
graphing_table <- data.frame( #Scale CAPRA_S using TABI method
CAPRA_S = as.vector(model.matrix(object = ~CAPRA_S, data = data %>% filter(is.na(CAPRA_S)==F, is.na(transcript)==F) %>% filter(transcript ==gene_name)) %>%
as_tibble(rownames="sample_idx") %>%
scale_design(~CAPRA_S) %>% select(CAPRA_S)),
#Combine with read counts
read_count_normalised = as.vector(data %>% filter(is.na(CAPRA_S)==F) %>% filter(transcript ==gene_name) %>% select(`read_count normalised`)) )
#Plot curve of specified parameters and of count points
graphing_table %>%
ggplot(aes(x=CAPRA_S, y=read_count.normalised)) +
geom_point(col="dodgerblue") +
stat_function(fun=plot_line, geom="line") +
scale_y_log10() +
labs(title=gene_name) #Label what gene chosen
}
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
## [3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Australia.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tibble_2.1.3 broom_0.5.2 tidyr_1.0.0 plotly_4.9.1 ggplot2_3.2.1
## [6] dplyr_0.8.3
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.10 ttBulk_0.1.0
## [4] purrr_0.3.3 lattice_0.20-38 colorspace_1.4-1
## [7] widyr_0.1.2 vctrs_0.2.0 generics_0.0.2
## [10] htmltools_0.4.0 viridisLite_0.3.0 yaml_2.2.0
## [13] utf8_1.1.4 rlang_0.4.2 pillar_1.4.2
## [16] later_1.0.0 glue_1.3.1 withr_2.1.2
## [19] lifecycle_0.1.0 stringr_1.4.0 munsell_0.5.0
## [22] gtable_0.3.0 htmlwidgets_1.5.1 evaluate_0.14
## [25] labeling_0.3 knitr_1.25 fastmap_1.0.1
## [28] httpuv_1.5.2 crosstalk_1.0.0 parallel_3.6.1
## [31] fansi_0.4.0 preprocessCore_1.46.0 Rcpp_1.0.3
## [34] xtable_1.8-4 scales_1.0.0 backports_1.1.5
## [37] promises_1.1.0 jsonlite_1.6 mime_0.7
## [40] digest_0.6.22 stringi_1.4.3 shiny_1.4.0
## [43] grid_3.6.1 cli_1.1.0 tools_3.6.1
## [46] magrittr_1.5 lazyeval_0.2.2 crayon_1.3.4
## [49] pkgconfig_2.0.3 zeallot_0.1.0 ellipsis_0.3.0
## [52] Matrix_1.2-17 data.table_1.12.4 assertthat_0.2.1
## [55] rmarkdown_1.16 httr_1.4.1 R6_2.4.1
## [58] nlme_3.1-140 compiler_3.6.1