suffix = ""
data_to_read = ""
“/sci/labs/yotamd/lab_share/lung_sc/bulk/RoxaOsi/02.Bam/”
counts <- read.delim("/sci/labs/yotamd/lab_share/lung_sc/bulk/RoxaOsi/02.Bam/RoxaOsi_noMTGLKI",skip=1,header=T,sep="\t",stringsAsFactors=F,row.names=1,check.names = F)
tpm <- read.delim("/sci/labs/yotamd/lab_share/lung_sc/bulk/RoxaOsi/02.Bam/RoxaOsi_noMTGLKI_tpm.txt",skip=1,header=T,sep="\t",stringsAsFactors=F,row.names=1,check.names = F)
#save before omitting
genes_names = counts$gene_name
genes_lengths = counts[,5]
#omit non relevant cols
counts=counts[,7:ncol(counts)]
#create genes names
genes_names=make.unique(genes_names) %>% replace_na('NA')
rownames(counts) = genes_names
#omit non relevant genes
omitgenes= startsWith(rownames(counts),"NA")
counts=counts[!omitgenes,]
genes_lengths = genes_lengths[!omitgenes] #update genes_lengths
#calcualte gene length and MT genes
mt_genes = startsWith(rownames(counts),"MT-")| startsWith(rownames(counts),"ERCC-")
#get colnames
cell.labels <- gsub(pattern = ".bam",replacement = "",colnames(counts))
#change colnames
colnames(counts) <- cell.labels
counts_seurat <- CreateSeuratObject(counts = counts, project = "egfr_counts", min.cells = 0, min.features = 1000)
cell_type = str_extract(cell.labels, "^[A-Z]{1,3}[0-9]{3}")
condition = str_extract(cell.labels, "cp|osi|ctrl|roxa|op")
replicate = str_extract(cell.labels, ".$")
metadata = data.frame(cell_type = cell_type, condition = condition, replicate = replicate, row.names = colnames(counts))
dds <- DESeqDataSetFromMatrix(countData = round(counts),
colData = metadata,
design = ~condition)
converting counts to integer mode
Warning in DESeqDataSet(se, design = design, ignoreRank) :
some variables in design formula are characters, converting to factors
nrow(dds)
[1] 60445
dds1 <- dds[ rowSums(counts(dds)) >= 3, ]
nrow(dds1)
[1] 33579
vst = vst(dds1, blind=FALSE)
-- note: fitType='parametric', but the dispersion trend was not well captured by the
function: y = a/x + b, and a local regression fit was automatically substituted.
specify fitType='local' or 'mean' to avoid this message next time.
PCAdata <- prcomp(t(assay(vst)))
autoplot(PCAdata, data = metadata,colour = "condition",label = FALSE, main="PCA")
VlnPlot(counts_seurat, features = c("nCount_RNA"),group.by = "orig.ident",pt.size = 3)
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Computation failed in `stat_ydensity()`:
replacement has 1 row, data has 0
counts_filtered = counts[, colnames(counts) != "HCC827cp3"]
metadata_filtered = metadata[ rownames(metadata) != "HCC827cp3",]
dds <- DESeqDataSetFromMatrix(countData = round(counts_filtered),
colData = metadata_filtered,
design = ~condition)
converting counts to integer mode
Warning in DESeqDataSet(se, design = design, ignoreRank) :
some variables in design formula are characters, converting to factors
nrow(dds)
[1] 60445
dds1 <- dds[ rowSums(counts(dds)) >= 3, ]
nrow(dds1)
[1] 33503
vst = vst(dds1, blind=FALSE)
sampleDists <- dist( t( assay(vst) ) )
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- colnames(counts_filtered)
colnames(sampleDistMatrix) <- colnames(counts_filtered)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors, main = "Sample Distance Matrix ",show_colnames = T)