#read in table
setwd("~/Desktop/Classes/Bioinformatics/final_project/human")
r1 = read.table ("human_LGN_2018-06-14_exon-matrix.csv", header= TRUE, sep= ",", row.names = 1)
#remove genes in less than 30 cells
rm_genes = which(rowSums(r1 > 0) <30)
r2 = r1 [-rm_genes,]
library(scran)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unsplit, which,
## which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
#create sce object
sce = SingleCellExperiment(list(counts = data.matrix(r2)))
#do clustering to reduce heterogeneity
clusters = quickCluster(sce, min.size=100)
## Warning: Setting 'use.ranks=TRUE' for the old defaults.
## Set 'use.ranks=FALSE' for the new defaults.
sce = computeSumFactors (sce, cluster = clusters)
#normalize, don't return log2
sce = normalize (sce, return_log = FALSE)
library(Seurat)
## Registered S3 method overwritten by 'R.oo':
## method from
## throw.default R.methodsS3
#create Seurat object using scran object
s_obj= CreateSeuratObject(counts = log(counts(sce) +1))
#find variable features to reduce run time
s_obj=FindVariableFeatures(s_obj)
#regress out batch
s_obj_nobatch = ScaleData(s_obj)
## Centering and scaling data matrix
s_obj = RunPCA(s_obj_nobatch, features = VariableFeatures(s_obj))
## PC_ 1
## Positive: 4340, 4336, 5129, 4099, 51148, 57571, 115584, 9725, 55314, 60484
## 9705, 933, 745, 84735, 101929249, 2065, 5354, 94015, 1287, 57471
## 6035, 5376, 2628, 54443, 1307, 347, 79152, 10397, 5653, 1298
## Negative: 55384, 692218, 157627, 9699, 100130155, 1159, 440823, 285175, 6326, 6323
## 4133, 22854, 4897, 65009, 8745, 359822, 10451, 2561, 6857, 817
## 80144, 490, 54839, 6616, 151835, 9118, 2977, 3716, 2903, 115827
## PC_ 2
## Positive: 2572, 3815, 2571, 5649, 80309, 594855, 3479, 654790, 11122, 23236
## 6092, 1012, 2890, 10769, 132204, 53826, 9547, 57689, 148, 56937
## 116443, 30820, 84959, 11249, 10752, 400120, 2897, 129684, 3356, 6529
## Negative: 283131, 6474, 105369345, 57084, 6934, 128553, 1286, 253650, 6696, 153579
## 4741, 1285, 151835, 5774, 10777, 5816, 4744, 2620, 3736, 105377862
## 4747, 5168, 11113, 4124, 8404, 266722, 10451, 5121, 80144, 7018
## PC_ 3
## Positive: 477, 64344, 5625, 5803, 493, 2261, 361, 3371, 80036, 7026
## 254295, 2697, 5015, 165, 2152, 2202, 27151, 6563, 9628, 3280
## 50509, 2670, 54739, 6538, 10410, 59352, 57326, 3133, 718, 80162
## Negative: 285987, 117177, 64919, 5318, 54715, 3925, 24141, 85445, 6695, 11075
## 2554, 388121, 1404, 105375415, 11249, 56892, 27145, 115827, 885, 2561
## 230, 2786, 10368, 347689, 53942, 100287225, 100996645, 758, 1268, 22895
## PC_ 4
## Positive: 5015, 8403, 4212, 2625, 23414, 148281, 7026, 613212, 55515, 4880
## 100309464, 3356, 104355219, 644192, 56934, 105369212, 56243, 846, 132204, 5617
## 100131897, 401498, 6096, 777, 9628, 80059, 1010, 1008, 5334, 493
## Negative: 3398, 7164, 4781, 477, 285987, 64919, 24141, 5318, 64344, 10485
## 1462, 6925, 5625, 2261, 105375415, 1404, 2670, 885, 2066, 388121
## 361, 339789, 3937, 11281, 1268, 3730, 11170, 165, 27145, 30010
## PC_ 5
## Positive: 5625, 2261, 477, 64344, 361, 3371, 6563, 50509, 2697, 59352
## 5803, 2202, 27151, 3280, 165, 10840, 4502, 6385, 254295, 57326
## 2152, 10485, 80036, 54762, 4776, 123041, 3955, 6263, 7837, 3691
## Negative: 3127, 3687, 1524, 3128, 7305, 3123, 713, 1436, 7805, 718
## 3122, 2207, 81704, 963, 1535, 54518, 3903, 3394, 64581, 64092
## 1536, 4973, 6916, 3635, 6850, 3109, 6001, 10578, 10859, 8832
s_obj = FindNeighbors(s_obj)
## Computing nearest neighbor graph
## Computing SNN
s_obj = FindClusters(s_obj, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1576
## Number of edges: 48040
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9445
## Number of communities: 7
## Elapsed time: 0 seconds
table(Idents(s_obj))
##
## 0 1 2 3 4 5 6
## 918 185 139 120 94 92 28
s_obj = RunTSNE(s_obj)
DimPlot (s_obj, reduction = "tsne", label = TRUE)

#2571 = GAD1, GABAergic neuron; 4340 = MOG, oligodendrocyte; 361 = AQP4, astrocyte
#determine cell types
features <- c("2571")
RidgePlot(object = s_obj, features = features, ncol = 2)
## Picking joint bandwidth of 0.0866

VlnPlot(object = s_obj, features = features)

FeaturePlot(object = s_obj, features = features)

DotPlot(object = s_obj, features = features) + RotatedAxis()

#label clusters based on marker genes
current.cluster.ids = c(0, 1, 2, 3, 4, 5, 6)
new.cluster.ids = c("Glutamatergic neuron", "Oligodendrocyte", "GABAergic neuron", "Glutamatergic neuron",
"Oligodendrocyte precursor ", "GABAergic neuron", "\n Astrocyte")
names(x = new.cluster.ids) <- levels(x = s_obj)
pbmc <- RenameIdents(object = s_obj, new.cluster.ids)
#plot again
DimPlot(object = pbmc, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()
