Solution Tobias Schmidt

Start the analysis

## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist
## 
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

data(leukemiasEset)

Summarize what information you can extract from the ExpressionSet (leukemiasEset)?

leuData <- exprs(leukemiasEset)
summary(leukemiasEset$LeukemiaType)
## ALL AML CLL CML NoL 
##  12  12  12  12  12
phenotypeData <- pData(leukemiasEset)

How many samples/patients are there in this study?

length(colnames(leuData))
## [1] 60

How many patients in each type of leukemia or normal group?

summary(leukemiasEset$LeukemiaType)
## ALL AML CLL CML NoL 
##  12  12  12  12  12

How many genes are profiled?

length(rownames(leuData))
## [1] 20172

What are the gene identifiers?

ENSG00000157851 is an Ensembl indentifier

Quality control

Extract the phenotype data and expression matrix from “leukemiasEset”. Answer the following questions by manipulating the expression matrix and phenotype data

Plot the histogram of the whole expression matrix and check the data distribution.

hist(leuData, breaks = 100)

plot of chunk unnamed-chunk-7

Can you characterize the distribution (in terms of normality and data scale)?

###Draw vertical lines on the histogram to indicate the median 25% quantile, 75% quantile and mean (use legend and include the plot in your report).

quanData <- quantile(leuData)
med <- quanData[3]
q25 <- quanData[2]
q75 <- quanData[4]
me <- mean(leuData)

hist(leuData, breaks = 100)
abline(v = med, col = "red")
abline(v = q25, col = "green")
abline(v = q75, col = "blue")
abline(v = me, col = "yellow")
legend("topright", col = c("red", "green", "blue", "yellow"), legend = c("median", 
    "25 quantile", "75 quantile", "mean"), pch = 20)

plot of chunk unnamed-chunk-8


t <- melt(leuData)
m <- ggplot(t, aes(x = value))
m <- m + geom_histogram(binwidth = 0.1, aes(fill = ..count..)) + scale_fill_gradient("Count", 
    low = "yellow", high = "blue")

thresh <- data.frame(boundary = c("median", "25 quantile", "75 quantile", "mean"), 
    vals = c(med, q25, q75, me))

m + geom_vline(data = thresh, aes(xintercept = vals, linetype = boundary, colour = boundary), 
    size = 2, show_guide = TRUE)

plot of chunk unnamed-chunk-10

Is there any alternative function in R to check the data distribution?

qqnorm(leuData)

plot of chunk unnamed-chunk-11

This works fine but it takes forever

You may noticed that the data distribution is not a strictly normal distribution. But this is based on the expression of all genes, we can still assume that the expression follows normal distribution on single gene level.

Examine data distributions for all individual patients using boxplot. To make your boxplots a bit nicer and more informative, use different colors to represent cancer types or normal tissue.

boxplot(leuData, horizontal = T, las = 2, col = leukemiasEset$LeukemiaType)
legend("topright", col = unique(as.numeric(leukemiasEset$LeukemiaType)), legend = unique(leukemiasEset$LeukemiaType), 
    pch = 20)

plot of chunk unnamed-chunk-12

Unsupervised exploratory analysis

Hierarchical clustering

Perform hierarchical clustering to cluster patients, using distance measurement: 1. Spearman correlation 2. Euclidean distance and agglomeration methods: 1. ward 2. complete

deuc <- dist(t(leuData), method = "euclidean")
hc.euc.ward <- hclust(deuc, method = "ward")
hc.euc.complete <- hclust(deuc, method = "complete")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")

Show the plots. Use different colors indicate different types of cancer and use title to show which distance measurement and agglomeration method you employed. You can use R function loaded by:

source(“ftp://129.187.44.58/share/chen/R_Functions/plothclust.R”)

plothclust(hc.euc.ward, col = leukemiasEset$LeukemiaType)
title(main = "Euclidean distance with 'ward' clustering")
legend("topright", legend = unique(leukemiasEset$LeukemiaType), pch = 15, col = unique(as.numeric(leukemiasEset$LeukemiaType)))

plot of chunk unnamed-chunk-14


plothclust(hc.euc.complete, col = leukemiasEset$LeukemiaType)
title(main = "euclidean distance with 'complete' clustering")
legend("topright", legend = unique(leukemiasEset$LeukemiaType), pch = 15, col = unique(as.numeric(leukemiasEset$LeukemiaType)))

plot of chunk unnamed-chunk-15

dsp <- 1 - cor(leuData, method = "spearman")
dsp.ward <- hclust(as.dist(dsp), method = "ward")
dsp.complete <- hclust(as.dist(dsp), method = "complete")
plothclust(dsp.ward, col = leukemiasEset$LeukemiaType)
title(main = "spearman distance with 'ward' clustering")
legend("topright", legend = unique(leukemiasEset$LeukemiaType), pch = 15, col = unique(as.numeric(leukemiasEset$LeukemiaType)))

plot of chunk unnamed-chunk-17

plothclust(dsp.complete, col = leukemiasEset$LeukemiaType)
title(main = "spearman distance with 'complete' clustering")
legend("topright", legend = unique(leukemiasEset$LeukemiaType), pch = 15, col = unique(as.numeric(leukemiasEset$LeukemiaType)))

plot of chunk unnamed-chunk-18

Which combination of distance method and agglomeration method outperforms others in distinguishing types of cancer or normal tissue, why?

A perfect clustering should show 5 perfectly seperated cluster. Ward clustering based on an euclidean distance works best for us. The reason is, that ward clustering tends to create clusters with same size. (perfect for uns because we have groups with same sizes.)

Complete clustering is not so useful because it tends to create smaller groups (the distance of all elements between to clusters should be maximum).

PCA

Now let’s characterize the transcriptome between different types of leukemia or normal tissues.

Perform PCA to explore similarities and dissimilarities between patients.

expr_scale <- scale(t(leuData), center = TRUE, scale = TRUE)
pca <- prcomp(expr_scale, center = FALSE, scale. = FALSE)

How much variance is explained by the first two PCs? (Eigen value)

(pca$sdev^2)[1:2]/sum(pca$sdev^2)
## [1] 0.2144 0.1287
sum((pca$sdev^2)[1:2]/sum(pca$sdev^2))
## [1] 0.3432

Plot patients on the first two principal components, use different colors to mark types of cancer and normal tissues. Use legends to show which color indicates which type.

plot(pca$x[, 1:2], col = leukemiasEset$LeukemiaType, pch = 20)
legend("topleft", legend = unique(leukemiasEset$LeukemiaType), pch = 15, col = unique(as.numeric(leukemiasEset$LeukemiaType)))

plot of chunk unnamed-chunk-21

Try to interpret the plot briefly in a short paragraph in your report (for example, which types of cancers are more distinct or similar, which type of leukemia is closer to the normal, which types of cancer are separated by which axis, etc).

It shows that the first principal component is not very useful to seperate the cancer types, but the second principal component can seperate the different cancer types.

All (Acute lymphoblastic leukemia) und CLL (chronic lymphoid leukemia) are placed very near to each other, but still can be seperated by a straigt line. This suits to their same biological origin.

CML (Chronic myelogenous leukemia) and NoL (Non-leukemia and healthy bone marrow) are also placed very near to each other.

Differential expression between ALL and CLL

You already see that the ALL an CLL tissues are distinguishable from our dataset. At this step, let’s only consider genes that are changing significantly between ALL vs. CLL.

Based on the data distribution we assumed, pick a right statistical test to come up with list of p- values associated with each gene.


#### annotation stuff
colnames(leuData) <- paste(leukemiasEset$LeukemiaType, sampleNames(leukemiasEset), 
    sep = "_")
t <- colnames(leuData)


### find position
pos_all <- grep("ALL", t)
pos_cll <- grep("CLL", t)

#### subsetting
df_all_cll <- leuData[, c(pos_all, pos_cll)]

factorlvl <- gl(2, 12, labels = c("ALL", "CLL"))
pval <- rowttests(df_all_cll, factorlvl)

Using FDR correction to adjust the p-values.


fdr <- p.adjust(pval$p.value, method = "fdr")

Make a volcano plot that x-axis shows the log fold change of medians between the two groups,y-axis shows the negative log transform of FDR. How do you interpret the volcano plot?

cal_fold_change <- function(x) {
    apply(x[, 1:12], 1, median)/apply(x[, 13:24], 1, median)
}
fold_change = cal_fold_change(df_all_cll)


plot(y = -log10(fdr), x = log2(fold_change))

plot of chunk unnamed-chunk-26


df <- data.frame(y = -log10(fdr), x = log2(fold_change))
df$significant = as.factor(abs(df$x) > log2(2) & df$y > -log10(0.05))

g = ggplot(data = df, aes(x = x, y = y, colour = significant)) + geom_point(alpha = 0.4, 
    size = 1.75)
g = g + xlab("log2 fold change") + ylab("-log10 p-value") + ggtitle(" min fold 2/0.5 and significant (blue) vs rest p = 0.05 (red)")


# dd_text = df[which((abs(df$x) > log2(1.5) & df$y > -log10(0.05))),] g +
# geom_text(data=dd_text, aes(x=x, y=y, label=name), colour='blue')
g

plot of chunk unnamed-chunk-28

A volcano plot shows for every gene if the differential expression is significant (y - axis; a higher value means a smaller p-value) and how big the differential expression is (x - axis; the value 1 means it is two times higher expressed / a value of -1 means it is 50% expressed)

How many genes are differentially expressed (FDR < 0.01)?

table(fdr < 0.01)
## 
## FALSE  TRUE 
## 17848  2324

2324 genes are differentially expressed

Plot a heatmap using differentially expressed genes in ALL and CLL samples. Cluster both rows and columns, and then interpret the result.

## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

heatmat <- df_all_cll[fdr < 0.01, ]

heatmap.2(heatmat, trace = "none", ColSideColors = as.character(as.numeric(factorlvl)))

plot of chunk unnamed-chunk-31

After selecting just the differentially expressed genes the heatmap shows that the patients are perfectly matched according their cancer types. It confirms the results from the p-value calculation.

Fisher’s exact test for enrichment analysis

In this step, we will have a functional annotation about these differentially expressed genes via enrichment analysis (Fisher’s exact test). You already know how to create a two way contingency table, which is the basis of Fisher’s exact test. Many programs can do enrichment analysis. Here, you do not need to write your own commands to create a contingency table. This task could be done by function loaded by:

source("ftp://129.187.44.58/share/chen/R_Functions/GOFisher.R")

After execute the command, there should be a function in your workspace called GOFisher(). For more details about the function, read the supplements at the end of this document. Now, you have the function to perform the enrichment analysis. There are three pieces of important information required in the analysis: 1. Genes differentially expressed 2. GO term or other gene set information 3. Background information

load("/Users/tobias/Dropbox/Uni/Master/HT_Course/Report/GOterms.RData")

After loading, you should have an object named GOterms containing a list of GO terms.

How many GO terms in “GOterms”?

length(GOterms)
## [1] 229

The last important piece of information we need is background. Because the data we are working with have been generated on a Microarray platform (HGU133PLUS2) and our data have been mapped to unique ensembl gene IDs, all possible genes can be detected are predefined by the array. Therefore, we can use the total number of unique ensembl IDs on that platform as the background, that is, n=20899. To do Fisher’s exact on one GO term you can do:

Perform the Fisher’s exact test (using function GOFisher) on all the GO terms in the GOterms, then extract p-values and adjust them with FDR correction?

result_fisher <- lapply(GOterms, GOFisher, diff.genes = rownames(df_all_cll[fdr < 
    0.01, ]), background = 20899)
extract_p <- function(x) {
    x$p.value
}
p_val_fisher <- lapply(result_fisher, extract_p)
fdr_fisher <- p.adjust(p_val_fisher, method = "fdr")

List the GO terms that are enriched by the differentially expressed genes between ALL and CLL.

interestingGO <- names(which(fdr_fisher < 0.01))
print(interestingGO)
##  [1] "GO:0000086" "GO:0000278" "GO:0000287" "GO:0003682" "GO:0004713"
##  [6] "GO:0005515" "GO:0005524" "GO:0005543" "GO:0005634" "GO:0005654"
## [11] "GO:0005737" "GO:0005764" "GO:0005794" "GO:0005813" "GO:0005819"
## [16] "GO:0005829" "GO:0005874" "GO:0006260" "GO:0006281" "GO:0006310"
## [21] "GO:0006468" "GO:0006915" "GO:0007049" "GO:0007067" "GO:0007264"
## [26] "GO:0007596" "GO:0008283" "GO:0009897" "GO:0019901" "GO:0035556"
## [31] "GO:0042981" "GO:0043065" "GO:0043123" "GO:0044281" "GO:0051056"
## [36] "GO:0051301" "GO:0097190"

Retrieve GO name and definition using biomaRt

You already found a list of GO terms of interest from enrichment analysis, but you won’t know anything from the IDs. We need to know the name and description of those GO terms. You can retrieve the information from BioMart:

Construct a query to retrieve the names and definitions of the over-represented GO terms identified before.

Hint: You can use the biomart/database “ensembl” and dataset “hsapiens_gene_ensembl”. This can be defined by:


ensembl_hsap <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
listFilters(ensembl_hsap)[grep("go", listFilters(ensembl_hsap)$name), ]
##                     name                                description
## 43            with_go_id                  with GO Term Accession(s)
## 88    with_ox_goslim_goa                         with GOSlim GOA(s)
## 100           with_go_go                              with GO ID(s)
## 128                go_id     GO Term Accession(s) [e.g. GO:0005515]
## 129 goslim_goa_accession GOSlim GOA Accessions(s) [e.g. GO:0005623]
## 199     go_evidence_code                           GO Evidence code
## 200       go_parent_term                      Parent term accession
## 201       go_parent_name                           Parent term name
## 235    with_homolog_ggor                  Orthologous Gorilla Genes
la <- listAttributes(ensembl_hsap)
la[grep("definition", la$name), ]
##               name        description
## 30 definition_1006 GO Term Definition
final_result <- getBM(attributes = c("go_id", "definition_1006", "name_1006"), 
    filters = "go_id", values = interestingGO, mart = ensembl_hsap)
final_result
##         go_id
## 1  GO:0000086
## 2  GO:0000278
## 3  GO:0000287
## 4  GO:0003682
## 5  GO:0004713
## 6  GO:0005515
## 7  GO:0005524
## 8  GO:0005543
## 9  GO:0005634
## 10 GO:0005654
## 11 GO:0005737
## 12 GO:0005764
## 13 GO:0005794
## 14 GO:0005813
## 15 GO:0005819
## 16 GO:0005829
## 17 GO:0005874
## 18 GO:0006260
## 19 GO:0006281
## 20 GO:0006310
## 21 GO:0006468
## 22 GO:0006915
## 23 GO:0007049
## 24 GO:0007067
## 25 GO:0007264
## 26 GO:0007596
## 27 GO:0008283
## 28 GO:0009897
## 29 GO:0019901
## 30 GO:0035556
## 31 GO:0042981
## 32 GO:0043065
## 33 GO:0043123
## 34 GO:0044281
## 35 GO:0051056
## 36 GO:0051301
## 37 GO:0097190
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                definition_1006
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        The mitotic cell cycle transition by which a cell in G2 commits to M phase. The process begins when the kinase activity of M cyclin/CDK complex reaches a threshold high enough for the cell cycle to proceed. This is accomplished by activating a positive feedback loop that results in the accumulation of unphosphorylated and active M cyclin/CDK complex. [GOC:mtg_cell_cycle]
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Progression through the phases of the mitotic cell cycle, the most common eukaryotic cell cycle, which canonically comprises four successive phases called G1, S, G2, and M and includes replication of the genome and the subsequent segregation of chromosomes into daughter cells. In some variant cell cycles nuclear replication or nuclear division may not be followed by cell division, or G1 and G2 phases may be absent. [GOC:mah, ISBN:0815316194, Reactome:69278]
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Interacting selectively and non-covalently with magnesium (Mg) ions. [GOC:ai]
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Interacting selectively and non-covalently with chromatin, the network of fibers of DNA, protein, and sometimes RNA, that make up the chromosomes of the eukaryotic nucleus during interphase. [GOC:jl, ISBN:0198506732, PMID:20404130]
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Catalysis of the reaction: ATP + a protein tyrosine = ADP + protein tyrosine phosphate. [EC:2.7.10]
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Interacting selectively and non-covalently with any protein or protein complex (a complex of two or more proteins that may include other nonprotein molecules). [GOC:go_curators]
## 7                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Interacting selectively and non-covalently with ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator. [ISBN:0198506732]
## 8                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Interacting selectively and non-covalently with phospholipids, a class of lipids containing phosphoric acid as a mono- or diester. [ISBN:0198506732]
## 9                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent. [GOC:go_curators]
## 10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     That part of the nuclear content other than the chromosomes or the nucleolus. [GOC:ma, ISBN:0124325653]
## 11                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      All of the contents of a cell excluding the plasma membrane and nucleus, but including other subcellular structures. [ISBN:0198547684]
## 12                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   A small lytic vacuole that has cell cycle-independent morphology and is found in most animal cells and that contains a variety of hydrolases, most of which have their maximal activities in the pH range 5-6. The contained enzymes display latency if properly isolated. About 40 different lysosomal hydrolases are known and lysosomes have a great variety of morphologies and functions. [GOC:mah, ISBN:0198506732]
## 13 A compound membranous cytoplasmic organelle of eukaryotic cells, consisting of flattened, ribosome-free vesicles arranged in a more or less regular stack. The Golgi apparatus differs from the endoplasmic reticulum in often having slightly thicker membranes, appearing in sections as a characteristic shallow semicircle so that the convex side (cis or entry face) abuts the endoplasmic reticulum, secretory vesicles emerging from the concave side (trans or exit face). In vertebrate cells there is usually one such organelle, while in invertebrates and plants, where they are known usually as dictyosomes, there may be several scattered in the cytoplasm. The Golgi apparatus processes proteins produced on the ribosomes of the rough endoplasmic reticulum; such processing includes modification of the core oligosaccharides of glycoproteins, and the sorting and packaging of proteins for transport to a variety of cellular locations. Three different regions of the Golgi are now recognized both in terms of structure and function: cis, in the vicinity of the cis face, trans, in the vicinity of the trans face, and medial, lying between the cis and trans regions. [ISBN:0198506732]
## 14                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                A structure comprised of a core structure (in most organisms, a pair of centrioles) and peripheral material from which a microtubule-based structure, such as a spindle apparatus, is organized. Centrosomes occur close to the nucleus during interphase in many eukaryotic cells, though in animal cells it changes continually during the cell-division cycle. [GOC:mah, ISBN:0198547684]
## 15                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  The array of microtubules and associated molecules that forms between opposite poles of a eukaryotic cell during mitosis or meiosis and serves to move the duplicated chromosomes apart. [ISBN:0198547684]
## 16                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    The part of the cytoplasm that does not contain organelles but which does contain other particulate matter, such as protein complexes. [GOC:hgd, GOC:jl]
## 17                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Any of the long, generally straight, hollow tubes of internal diameter 12-15 nm and external diameter 24 nm found in a wide variety of eukaryotic cells; each consists (usually) of 13 protofilaments of polymeric tubulin, staggered in such a manner that the tubulin monomers are arranged in a helical pattern on the microtubular surface, and with the alpha/beta axes of the tubulin subunits parallel to the long axis of the tubule; exist in equilibrium with pool of tubulin monomers and can be rapidly assembled or disassembled in response to physiological stimuli; concerned with force generation, e.g. in the spindle. [ISBN:0879693568]
## 18                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             The cellular metabolic process in which a cell duplicates one or more molecules of DNA. DNA replication begins when specific sequences, known as origins of replication, are recognized and bound by initiation proteins, and ends when the original DNA molecule has been completely duplicated and the copies topologically separated. The unit of replication usually corresponds to the genome of the cell, an organelle, or a virus. The template for replication can either be an existing DNA molecule or RNA. [GOC:mah]
## 19                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 The process of restoring DNA after damage. Genomes are subject to damage by chemical and physical agents in the environment (e.g. UV and ionizing radiations, chemical mutagens, fungal and bacterial toxins, etc.) and by free radicals or alkylating agents endogenously generated in metabolism. DNA is also damaged because of errors during its replication. A variety of different DNA repair pathways have been reported that include direct reversal, base excision repair, nucleotide excision repair, photoreactivation, bypass, double-strand break repair pathway, and mismatch repair pathway. [PMID:11563486]
## 20                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Any process in which a new genotype is formed by reassortment of genes resulting in gene combinations different from those that were present in the parents. In eukaryotes genetic recombination can occur by chromosome assortment, intrachromosomal recombination, or nonreciprocal interchromosomal recombination. Intrachromosomal recombination occurs by crossing over. In bacteria it may occur by genetic transformation, conjugation, transduction, or F-duction. [ISBN:0198506732]
## 21                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      The process of introducing a phosphate group on to a protein. [GOC:hb]
## 22                                                                                                                                                                                                                                                                                                                                                                                                                                                  A programmed cell death process which begins when a cell receives an internal (e.g. DNA damage) or external signal (e.g. an extracellular death ligand), and proceeds through a series of biochemical events (signaling pathways) which typically lead to rounding-up of the cell, retraction of pseudopodes, reduction of cellular volume (pyknosis), chromatin condensation, nuclear fragmentation (karyorrhexis), plasma membrane blebbing and fragmentation of the cell into apoptotic bodies. The process ends when the cell has died. The process is divided into a signaling pathway phase, and an execution phase, which is triggered by the former. [GOC:dhl, GOC:ecd, GOC:go_curators, GOC:mtg_apoptosis, GOC:tb, ISBN:0198506732, PMID:18846107, PMID:21494263]
## 23                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          The progression of biochemical and morphological phases and events that occur in a cell during successive cell replication or nuclear replication events. Canonically, the cell cycle comprises the replication and segregation of genetic material followed by the division of the cell, but in endocycles or syncytial cells nuclear replication or nuclear division may not be followed by cell division. [GOC:go_curators, GOC:mtg_cell_cycle]
## 24                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          A cell cycle process comprising the steps by which the nucleus of a eukaryotic cell divides; the process involves condensation of chromosomal DNA into a highly compacted form. Canonically, mitosis produces two daughter nuclei whose chromosome complement is identical to that of the mother cell. [GOC:dph, GOC:ma, GOC:mah, ISBN:0198547684]
## 25                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Any series of molecular signals in which a small monomeric GTPase relays one or more of the signals. [GOC:mah]
## 26                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  The sequential process in which the multiple coagulation factors of the blood interact, ultimately resulting in the formation of an insoluble fibrin clot; it may be divided into three stages: stage 1, the formation of intrinsic and extrinsic prothrombin converting principle; stage 2, the formation of thrombin; stage 3, the formation of stable fibrin polymers. [http://www.graylab.ac.uk/omd/, ISBN:0198506732]
## 27                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             The multiplication or reproduction of cells, resulting in the expansion of a cell population. [GOC:mah, GOC:mb]
## 28                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   The side (leaflet) of the plasma membrane that is opposite to the side that faces the cytoplasm. [GOC:tb]
## 29                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Interacting selectively and non-covalently with a protein kinase, any enzyme that catalyzes the transfer of a phosphate group, usually from ATP, to a protein substrate. [GOC:jl]
## 30                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  The process in which a signal is passed on to downstream components within the cell, which become activated themselves to further propagate the signal and finally trigger a change in the function or state of the cell. [GOC:bf, GOC:jl, GOC:signaling, ISBN:3527303782]
## 31                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Any process that modulates the occurrence or rate of cell death by apoptotic process. [GOC:jl, GOC:mtg_apoptosis]
## 32                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Any process that activates or increases the frequency, rate or extent of cell death by apoptotic process. [GOC:jl, GOC:mtg_apoptosis]
## 33                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Any process that activates or increases the frequency, rate or extent of I-kappaB kinase/NF-kappaB signaling. [GOC:jl]
## 34                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   The chemical reactions and pathways involving small molecules, any low molecular weight, monomeric, non-encoded molecule. [GOC:curators, GOC:pde, GOC:vw]
## 35                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Any process that modulates the frequency, rate or extent of small GTPase mediated signal transduction. [GOC:go_curators]
## 36                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          The process resulting in the physical partitioning and separation of a cell into daughter cells. [GOC:go_curators]
## 37                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 A series of molecular signals which triggers the apoptotic death of a cell. The pathway starts with reception of a signal, and ends when the execution phase of apoptosis is triggered. [GOC:mtg_apoptosis]
##                                                     name_1006
## 1                       G2/M transition of mitotic cell cycle
## 2                                          mitotic cell cycle
## 3                                       magnesium ion binding
## 4                                           chromatin binding
## 5                            protein tyrosine kinase activity
## 6                                             protein binding
## 7                                                 ATP binding
## 8                                        phospholipid binding
## 9                                                     nucleus
## 10                                                nucleoplasm
## 11                                                  cytoplasm
## 12                                                   lysosome
## 13                                            Golgi apparatus
## 14                                                 centrosome
## 15                                                    spindle
## 16                                                    cytosol
## 17                                                microtubule
## 18                                            DNA replication
## 19                                                 DNA repair
## 20                                          DNA recombination
## 21                                    protein phosphorylation
## 22                                          apoptotic process
## 23                                                 cell cycle
## 24                                                    mitosis
## 25                  small GTPase mediated signal transduction
## 26                                          blood coagulation
## 27                                         cell proliferation
## 28                           external side of plasma membrane
## 29                                     protein kinase binding
## 30                          intracellular signal transduction
## 31                            regulation of apoptotic process
## 32                   positive regulation of apoptotic process
## 33 positive regulation of I-kappaB kinase/NF-kappaB signaling
## 34                           small molecule metabolic process
## 35    regulation of small GTPase mediated signal transduction
## 36                                              cell division
## 37                                apoptotic signaling pathway