title: “infercnv_run” author: “Brian Haas” date: “9/5/2018” output: html_document: default pdf_document: default —
infercnv_obj = CreateInfercnvObject(
raw_counts_matrix="sim.data",
annotations_file="sim.sample.annots.txt",
delim="\t",
gene_order_file="gencode_v19_gene_pos.txt",
ref_group_names=c("normal") )
Removing those genes that are very lowly expressed or present in very few cells
# filter out low expressed genes
cutoff=2
infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)
## INFO [2018-09-13 16:43:09] ::above_min_mean_expr_cutoff:Start
## INFO [2018-09-13 16:43:09] ::process_data:Averages (counts).
## INFO [2018-09-13 16:43:09] Removing 16138 genes from matrix as below mean expr threshold: 2
# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)
## INFO [2018-09-13 16:43:09] no genes removed due to min cells/gene filter
## for safe keeping
infercnv_orig_filtered = infercnv_obj
#plot_mean_chr_expr_lineplot(infercnv_obj)
save('infercnv_obj', file = 'infercnv_obj.orig_filtered')
infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj)
Suggested by Matan for removing noisy variation at low counts
infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.anscombe')
#plot_mean_chr_expr_lineplot(infercnv_obj)
infercnv_obj <- log2xplus1(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.log_transformed')
threshold = mean(abs(get_average_bounds(infercnv_obj)))
infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
## INFO [2018-09-13 16:43:11] ::process_data:setting max centered expr, threshold set to: +/-: 4.13549794979821
plot_cnv(infercnv_obj, output_filename='infercnv.logtransf', x.range="auto", title = "Before InferCNV (filtered & log2 transformed)", color_safe_pal = FALSE, x.center = mean(infercnv_obj@expr.data))
## INFO [2018-09-13 16:43:11] ::plot_cnv:Start
## INFO [2018-09-13 16:43:11] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=2617406.32784413 Min=1.15363990062636 Max=4.13549794979821.
## INFO [2018-09-13 16:43:11] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:43:11] plot_cnv(): auto thresholding at: (-4.135498 , 4.135498)
## INFO [2018-09-13 16:43:11] plot_cnv_observation:Start
## INFO [2018-09-13 16:43:11] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:43:11] clustering observations via method: average
## INFO [2018-09-13 16:43:11] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:43:25] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:43:25] Quantiles of plotted data range: 1.15363990062636,1.15363990062636,1.15363990062636,2.98713371418024,4.13549794979821
## INFO [2018-09-13 16:43:26] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:43:27] plot_cnv_references:Start
## INFO [2018-09-13 16:43:27] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:43:27] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:43:27] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:43:31] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:43:31] Quantiles of plotted data range: 1.15363990062636,1.15363990062636,1.15363990062636,3.00668875715859,4.13549794979821
## INFO [2018-09-13 16:43:31] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.logtransf.png")
infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
## INFO [2018-09-13 16:43:33] ::smooth_window:Start.
save('infercnv_obj', file='infercnv_obj.smooth_by_chr')
# re-center each cell
infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
## INFO [2018-09-13 16:43:36] ::center_smooth across chromosomes per cell
save('infercnv_obj', file='infercnv_obj.cells_recentered')
#plot_mean_chr_expr_lineplot(infercnv_obj)
plot_cnv(infercnv_obj, output_filename='infercnv.chr_smoothed', x.range="auto", title = "chr smoothed", color_safe_pal = FALSE)
## INFO [2018-09-13 16:43:36] ::plot_cnv:Start
## INFO [2018-09-13 16:43:36] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=-1261.60403741342 Min=-1.09939888679842 Max=2.26375113985155.
## INFO [2018-09-13 16:43:36] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:43:37] plot_cnv(): auto thresholding at: (-0.195838 , 0.195838)
## INFO [2018-09-13 16:43:37] plot_cnv_observation:Start
## INFO [2018-09-13 16:43:37] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:43:37] clustering observations via method: average
## INFO [2018-09-13 16:43:37] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:43:52] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:43:52] Quantiles of plotted data range: -0.195837709771411,-0.0781972877791719,0,0.0772662938365037,0.195837709771411
## INFO [2018-09-13 16:43:53] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:43:55] plot_cnv_references:Start
## INFO [2018-09-13 16:43:55] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:43:55] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:43:55] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:43:59] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:43:59] Quantiles of plotted data range: -0.195837709771411,-0.0723005458110942,0,0.0744799958044051,0.195837709771411
## INFO [2018-09-13 16:43:59] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.chr_smoothed.png")
infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj)
## INFO [2018-09-13 16:44:01] ::subtract_ref_expr_from_obs:Start
save('infercnv_obj', file='infercnv_obj.ref_subtracted')
#plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.ref_subtracted', x.range="auto", title="ref subtracted", color_safe_pal = FALSE)
## INFO [2018-09-13 16:44:26] ::plot_cnv:Start
## INFO [2018-09-13 16:44:26] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=-4373.91939505554 Min=-0.991876041263262 Max=2.37127398538671.
## INFO [2018-09-13 16:44:26] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:44:26] plot_cnv(): auto thresholding at: (-0.173751 , 0.173751)
## INFO [2018-09-13 16:44:26] plot_cnv_observation:Start
## INFO [2018-09-13 16:44:26] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:44:26] clustering observations via method: average
## INFO [2018-09-13 16:44:27] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:44:43] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:44:43] Quantiles of plotted data range: -0.173750601889055,-0.0666733306790223,-0.000280362646438466,0.0646892154115356,0.173750601889055
## INFO [2018-09-13 16:44:43] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:44:44] plot_cnv_references:Start
## INFO [2018-09-13 16:44:44] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:44:44] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:44:44] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:44:50] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:44:50] Quantiles of plotted data range: -0.173750601889055,-0.0610413162755091,-0.000485920238029097,0.0607364987304976,0.173750601889055
## INFO [2018-09-13 16:44:50] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.ref_subtracted.png")
Converting the log(FC) values to regular fold change values, centered at 1 (no fold change)
This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region.
infercnv_obj <- invert_log2(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.inverted_log')
#plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.inverted', color_safe_pal = FALSE, x.range="auto", x.center=1, title = "inverted log FC to FC")
## INFO [2018-09-13 16:44:53] ::plot_cnv:Start
## INFO [2018-09-13 16:44:53] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=1316088.95692313 Min=0.502823491766739 Max=5.17397823583264.
## INFO [2018-09-13 16:44:53] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:44:53] plot_cnv(): auto thresholding at: (-1.116681 , 1.116681)
## INFO [2018-09-13 16:44:53] plot_cnv_observation:Start
## INFO [2018-09-13 16:44:53] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:44:53] clustering observations via method: average
## INFO [2018-09-13 16:44:53] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:45:08] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:08] Quantiles of plotted data range: 0.502823491766739,0.954837193372243,0.99980568630344,1.0458596170087,1.1166807716154
## INFO [2018-09-13 16:45:08] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:45:09] plot_cnv_references:Start
## INFO [2018-09-13 16:45:09] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:45:09] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:45:09] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:45:13] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:13] Quantiles of plotted data range: 0.506179263185287,0.95857198626623,0.999663242472583,1.0429980775791,1.1166807716154
## INFO [2018-09-13 16:45:13] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.inverted.png")
infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1)
## INFO [2018-09-13 16:45:15] :: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: 0.93512495701109 - 1.06918844799331
save('infercnv_obj', file='infercnv_obj.denoised')
#plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.denoised', x.range="auto", x.center=1, title="denoised", color_safe_pal = FALSE)
## INFO [2018-09-13 16:45:16] ::plot_cnv:Start
## INFO [2018-09-13 16:45:16] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=1317082.399583 Min=0.502823491766739 Max=5.17397823583264.
## INFO [2018-09-13 16:45:16] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:45:16] plot_cnv(): auto thresholding at: (-1.116681 , 1.116681)
## INFO [2018-09-13 16:45:16] plot_cnv_observation:Start
## INFO [2018-09-13 16:45:16] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:45:16] clustering observations via method: average
## INFO [2018-09-13 16:45:16] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:45:31] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:31] Quantiles of plotted data range: 0.502823491766739,1.0021567025022,1.0021567025022,1.0021567025022,1.1166807716154
## INFO [2018-09-13 16:45:31] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:45:32] plot_cnv_references:Start
## INFO [2018-09-13 16:45:32] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:45:32] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:45:32] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:45:36] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:36] Quantiles of plotted data range: 0.506179263185287,1.0021567025022,1.0021567025022,1.0021567025022,1.1166807716154
## INFO [2018-09-13 16:45:36] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.denoised.png")
This generally improves on the visualization
infercnv_obj = remove_outliers_norm(infercnv_obj)
## INFO [2018-09-13 16:45:38] ::remove_outlier_norm:Start out_method: average_bound lower_bound: NA upper_bound: NA
## INFO [2018-09-13 16:45:38] ::remove_outlier_norm using method: average_bound for defining outliers.
save('infercnv_obj', file="infercnv_obj.outliers_removed")
plot_cnv(infercnv_obj, output_filename='infercnv.outliers_removed', color_safe_pal = FALSE, x.range="auto", x.center=1, title = "outliers removed")
## INFO [2018-09-13 16:45:39] ::plot_cnv:Start
## INFO [2018-09-13 16:45:39] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=1316994.67533397 Min=0.635456670448359 Max=2.05752707171781.
## INFO [2018-09-13 16:45:39] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-13 16:45:39] plot_cnv(): auto thresholding at: (-1.116681 , 1.116681)
## INFO [2018-09-13 16:45:39] plot_cnv_observation:Start
## INFO [2018-09-13 16:45:39] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:45:39] clustering observations via method: average
## INFO [2018-09-13 16:45:39] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:45:53] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:53] Quantiles of plotted data range: 0.635456670448359,1.0021567025022,1.0021567025022,1.0021567025022,1.1166807716154
## INFO [2018-09-13 16:45:53] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:45:54] plot_cnv_references:Start
## INFO [2018-09-13 16:45:54] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:45:54] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:45:54] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:45:57] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:45:57] Quantiles of plotted data range: 0.635456670448359,1.0021567025022,1.0021567025022,1.0021567025022,1.1166807716154
## INFO [2018-09-13 16:45:57] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.outliers_removed.png")
save('infercnv_obj', file='infercnv_obj.final')
Runs a t-Test comparing tumor/normal for each patient and normal sample, and masks out those genes that are not significantly DE.
load('infercnv_obj.final')
plot_data = infercnv_obj@expr.data
high_threshold = max(abs(quantile(plot_data[plot_data != 0], c(0.05, 0.95))))
low_threshold = -1 * high_threshold
infercnv_obj2 <- infercnv:::mask_non_DE_genes_basic(infercnv_obj, test.use = 't', center_val=1)
## INFO [2018-09-13 16:46:00] Finding DE genes between Apt25 and normal
## INFO [2018-09-13 16:46:02] Found 604 genes as DE
## INFO [2018-09-13 16:46:02] Finding DE genes between Bpt5 and normal
## INFO [2018-09-13 16:46:03] Found 553 genes as DE
## INFO [2018-09-13 16:46:03] Finding DE genes between C1pt5 and normal
## INFO [2018-09-13 16:46:04] Found 498 genes as DE
## INFO [2018-09-13 16:46:04] Finding DE genes between D1pt75 and normal
## INFO [2018-09-13 16:46:05] Found 568 genes as DE
plot_cnv(infercnv_obj2, output_filename='infercnv.non-DE-genes-masked', color_safe_pal = FALSE, x.range=c(low_threshold, high_threshold), x.center=1, title = "non-DE-genes-masked")
## INFO [2018-09-13 16:46:05] ::plot_cnv:Start
## INFO [2018-09-13 16:46:05] ::plot_cnv:Current data dimensions (r,c)=5847,225 Total=1312898.99853646 Min=0.635456670448359 Max=2.05752707171781.
## INFO [2018-09-13 16:46:05] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## Warning in if (length(x.range) == 1 & x.range == "auto") {: the condition
## has length > 1 and only the first element will be used
## INFO [2018-09-13 16:46:06] plot_cnv_observation:Start
## INFO [2018-09-13 16:46:06] Observation data size: Cells= 180 Genes= 5847
## INFO [2018-09-13 16:46:06] clustering observations via method: average
## INFO [2018-09-13 16:46:06] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-13 16:46:18] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:46:18] Quantiles of plotted data range: 0.680715055892036,1,1,1,1.1166807716154
## INFO [2018-09-13 16:46:18] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-13 16:46:19] plot_cnv_references:Start
## INFO [2018-09-13 16:46:19] Reference data size: Cells= 45 Genes= 5847
## INFO [2018-09-13 16:46:19] plot_cnv_references:Number reference groups= 1
## INFO [2018-09-13 16:46:19] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-13 16:46:22] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2018-09-13 16:46:22] Quantiles of plotted data range: 0.635456670448359,1.0021567025022,1.0021567025022,1.0021567025022,1.1166807716154
## INFO [2018-09-13 16:46:22] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.non-DE-genes-masked.png")