Reading in the raw counts matrix and meta data, populating the infercnv object
infercnv_obj = CreateInfercnvObject(
raw_counts_matrix="oligodendroglioma_expression_downsampled.counts.matrix",
annotations_file="oligodendroglioma_annotations_downsampled.txt",
delim="\t",
gene_order_file="gencode_downsampled.txt",
ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))
Removing those genes that are very lowly expressed or present in very few cells
# filter out low expressed genes
cutoff=2
infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)
## INFO [2018-09-11 12:27:16] ::above_min_mean_expr_cutoff:Start
## INFO [2018-09-11 12:27:16] ::process_data:Averages (counts).
## INFO [2018-09-11 12:27:16] Removing 4585 genes from matrix as below mean expr threshold: 2
## INFO [2018-09-11 12:27:16] validating infercnv_obj
# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)
## INFO [2018-09-11 12:27:16] no genes removed due to min cells/gene filter
## for safe keeping
infercnv_orig_filtered = infercnv_obj
plot_mean_chr_expr_lineplot(infercnv_obj)
save('infercnv_obj', file = 'infercnv_obj.orig_filtered')
infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj)
Suggested by Matan for removing noisy variation at low counts
infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.anscombe')
plot_mean_chr_expr_lineplot(infercnv_obj)
infercnv_obj <- log2xplus1(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.log_transformed')
threshold = mean(abs(get_average_bounds(infercnv_obj)))
infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
## INFO [2018-09-11 12:27:20] ::process_data:setting max centered expr, threshold set to: +/-: 4.35428798484577
infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
## INFO [2018-09-11 12:27:20] ::smooth_window:Start.
save('infercnv_obj', file='infercnv_obj.smooth_by_chr')
# re-center each cell
infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
## INFO [2018-09-11 12:27:22] ::center_smooth across chromosomes per cell
save('infercnv_obj', file='infercnv_obj.cells_recentered')
#plot_mean_chr_expr_lineplot(infercnv_obj)
plot_cnv(infercnv_obj, output_filename='infercnv.chr_smoothed', x.range="auto", title = "chr smoothed")
## INFO [2018-09-11 12:27:23] ::plot_cnv:Start
## INFO [2018-09-11 12:27:23] ::plot_cnv:Current data dimensions (r,c)=5753,184 Total=-9564.10623616772 Min=-1.17632839525088 Max=2.50205880548827.
## INFO [2018-09-11 12:27:23] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-11 12:27:23] plot_cnv_observation:Start
## INFO [2018-09-11 12:27:23] Observation data size: Cells= 142 Genes= 5753
## INFO [2018-09-11 12:27:23] clustering observations via method: average
## INFO [2018-09-11 12:27:24] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-11 12:27:36] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-11 12:27:37] plot_cnv_references:Start
## INFO [2018-09-11 12:27:37] Reference data size: Cells= 42 Genes= 5753
## INFO [2018-09-11 12:27:37] plot_cnv_references:Number reference groups= 2
## INFO [2018-09-11 12:27:37] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-11 12:27:41] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.chr_smoothed.png")
infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj)
## INFO [2018-09-11 12:27:43] ::subtract_ref_expr_from_obs:Start
save('infercnv_obj', file='infercnv_obj.ref_subtracted')
plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.ref_subtracted', x.range="auto", title="ref subtracted")
## INFO [2018-09-11 12:28:02] ::plot_cnv:Start
## INFO [2018-09-11 12:28:02] ::plot_cnv:Current data dimensions (r,c)=5753,184 Total=-8593.80079726078 Min=-0.842089154571815 Max=2.60286965503209.
## INFO [2018-09-11 12:28:02] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-11 12:28:02] plot_cnv_observation:Start
## INFO [2018-09-11 12:28:02] Observation data size: Cells= 142 Genes= 5753
## INFO [2018-09-11 12:28:02] clustering observations via method: average
## INFO [2018-09-11 12:28:02] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-11 12:28:13] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-11 12:28:14] plot_cnv_references:Start
## INFO [2018-09-11 12:28:14] Reference data size: Cells= 42 Genes= 5753
## INFO [2018-09-11 12:28:14] plot_cnv_references:Number reference groups= 2
## INFO [2018-09-11 12:28:14] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-11 12:28:18] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.ref_subtracted.png")
Converting the log(FC) values to regular fold change values, centered at 1 (no fold change)
This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region.
infercnv_obj <- invert_log2(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.inverted_log')
plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.inverted', color_safe_pal = FALSE, x.range="auto", x.center=1, title = "inverted log FC to FC")
## INFO [2018-09-11 12:28:20] ::plot_cnv:Start
## INFO [2018-09-11 12:28:20] ::plot_cnv:Current data dimensions (r,c)=5753,184 Total=1056169.98990042 Min=0.557835187422296 Max=6.07493787447548.
## INFO [2018-09-11 12:28:20] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-11 12:28:20] plot_cnv_observation:Start
## INFO [2018-09-11 12:28:20] Observation data size: Cells= 142 Genes= 5753
## INFO [2018-09-11 12:28:20] clustering observations via method: average
## INFO [2018-09-11 12:28:20] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-11 12:28:31] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-11 12:28:32] plot_cnv_references:Start
## INFO [2018-09-11 12:28:32] Reference data size: Cells= 42 Genes= 5753
## INFO [2018-09-11 12:28:32] plot_cnv_references:Number reference groups= 2
## INFO [2018-09-11 12:28:32] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-11 12:28:35] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.inverted.png")
infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5)
## INFO [2018-09-11 12:28:37] :: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: 0.920654261003624 - 1.0829412024659
save('infercnv_obj', file='infercnv_obj.denoised')
#plot_mean_chr_expr_lineplot(infercnv_obj, sep_obs_types = TRUE)
plot_cnv(infercnv_obj, output_filename='infercnv.denoised', x.range="auto", x.center=1, title="denoised")
## INFO [2018-09-11 12:28:37] ::plot_cnv:Start
## INFO [2018-09-11 12:28:37] ::plot_cnv:Current data dimensions (r,c)=5753,184 Total=1056199.99050381 Min=0.557835187422296 Max=6.07493787447548.
## INFO [2018-09-11 12:28:37] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-11 12:28:38] plot_cnv_observation:Start
## INFO [2018-09-11 12:28:38] Observation data size: Cells= 142 Genes= 5753
## INFO [2018-09-11 12:28:38] clustering observations via method: average
## INFO [2018-09-11 12:28:38] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-11 12:28:48] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-11 12:28:49] plot_cnv_references:Start
## INFO [2018-09-11 12:28:49] Reference data size: Cells= 42 Genes= 5753
## INFO [2018-09-11 12:28:49] plot_cnv_references:Number reference groups= 2
## INFO [2018-09-11 12:28:49] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-11 12:28:52] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.denoised.png")
This generally improves on the visualization
infercnv_obj = remove_outliers_norm(infercnv_obj)
## INFO [2018-09-11 12:28:54] ::remove_outlier_norm:Start out_method: average_bound lower_bound: NA upper_bound: NA
## INFO [2018-09-11 12:28:54] ::remove_outlier_norm using method: average_bound for defining outliers.
save('infercnv_obj', file="infercnv_obj.outliers_removed")
plot_cnv(infercnv_obj, output_filename='infercnv.outliers_removed', color_safe_pal = FALSE, x.range="auto", x.center=1, title = "outliers removed")
## INFO [2018-09-11 12:28:54] ::plot_cnv:Start
## INFO [2018-09-11 12:28:54] ::plot_cnv:Current data dimensions (r,c)=5753,184 Total=1056137.9046635 Min=0.687883225678836 Max=1.85002718108154.
## INFO [2018-09-11 12:28:54] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2018-09-11 12:28:55] plot_cnv_observation:Start
## INFO [2018-09-11 12:28:55] Observation data size: Cells= 142 Genes= 5753
## INFO [2018-09-11 12:28:55] clustering observations via method: average
## INFO [2018-09-11 12:28:55] plot_cnv_observation:Writing observation groupings/color.
## INFO [2018-09-11 12:29:05] plot_cnv_references:Writing observation data to ./observations.txt
## INFO [2018-09-11 12:29:06] plot_cnv_references:Start
## INFO [2018-09-11 12:29:06] Reference data size: Cells= 42 Genes= 5753
## INFO [2018-09-11 12:29:06] plot_cnv_references:Number reference groups= 2
## INFO [2018-09-11 12:29:06] plot_cnv_references:Plotting heatmap.
## INFO [2018-09-11 12:29:09] plot_cnv_references:Writing reference data to ./references.txt
## quartz_off_screen
## 2
knitr::include_graphics("infercnv.outliers_removed.png")