A singly linked list is a sequence of nodes, where each node stores an element and a link to the next node
相关函数的定义:
## 创建一个空环境
create_emptyenv <- function() {
emptyenv()
}
# 判断列表是不是空的
isEmpty <- function(llist) {
if (class(llist) != "linkList")
warning("Not linkList class") # 判断是不是linear linked list
identical(llist, create_emptyenv()) # identical 函数是用来检测两个对象是不是完全相等
}
# 创建链表节点
linkListNode <- function(val, node = NULL) {
llist <- new.env(parent = create_emptyenv())
llist$element <- val
llist$nextnode <- node
class(llist) <- "linkList"
llist
}
# 例子
LList <- linkListNode(5, linkListNode(2, create_emptyenv()))
# 获取下一个节点
setNextNode <- function(llist) {
llist$nextnode
}
# 获取元素
setNextElement <- function(llist) {
llist$element
}
# 获取节点的大小,这里使用了递归的思想
sizeLinkList <- function(llist, size = 0) {
if (isEmpty(llist))
{
return(size)
} else
{
size <- size + 1L
sizeLinkList(llist$nextnode, size)
}
}
# 添加item
addElement <- function(new, llist)
{
if (isEmpty(llist)) {
llist <- linkedlist(new)
} else
{
llist <- linkListNode(llist, new)
}
llist
}
# 删除item
delElement <- function(llist, pos = NULL) {
if (is.null(pos))
warning("Nothing to delete")
listsize <- sizeLinkList(llist)
if (pos > listsize)
stop("Position greater than size of list")
if (isEmpty(llist)) {
warning("Empty List")
} else if (pos == 1) {
PreviousNode <- llist$nextnode
} else
{
PreviousNode <- linkListNode(llist$element)
for (i in 1:(listsize - 1)) {
if (pos == (i + 1)) {
PreviousNode$nextnode <- setNextNode(llist$nextnode)
} else
{
PreviousNode$nextnode <- llist$nextnode
llist <- llist$nextnode
}
}
}
return(PreviousNode)
}
# 查找某一个item
findItem <- function(llist,
item,
pos = 0,
itemFound = FALSE) {
if (itemFound == TRUE)
{
return(itemFound)
} else if (isEmpty(llist)) {
return(FALSE)
} else
{
pos <- pos + 1L
if (llist$element == item)
itemFound <- TRUE
findItem(llist$nextnode, item, size, itemFound)
}
}
双向链表是单向链表的衍生,可以查找前面的元素
dlinkListNode <- function(val,
prevnode = NULL,
node = NULL) {
llist <- new.env(parent = create_emptyenv())
llist$prevnode <- prevnode
llist$element <- val # 上一个元素
llist$nextnode <- node # 下一个元素
class(llist) <- "dlinkList"
llist
}
cicularLinkList <- function(llist, val) {
if (isEmpty(llist)) {
llist <- linkListNode(val)
head <- llist
} else
{
llistNew <- linkListNode(val)
llistNew$nextnode <- head
llist <- linkListNode(llist, llistNew)
}
llist
}
数组列表
ALinkList <- setRefClass(
Class = "ALinkList",
fields = list(
Alist = "array",
listsize = "integer",
arraySize = "integer",
maxSize = "integer"
),
methods = list(
initialize = function(...) {
listsize <<- 0L
arraySize <<- 100L
Alist <<- array(dim = arraySize)
maxSize <<- arraySize
}
)
)
listlen = function()
{
return(listsize)
}
updateArrayList = function() {
Alist <<- c(Alist, array(dim = arraySize))
maxSize <<- maxSize + arraySize
}
addItem = function(item) {
if (maxSize <= listsize) {
updateArrayList()
}
listsize <<- listsize + 1L
Alist[listsize] <- item
return(listsize)
}
removeItem = function(i)
{
Alist[i] <<- NULL
listsize <<- listsize - 1L
}
searchItem = function(val) {
pointer <- 1L
while (pointer != listsize) {
if (Alist[pointer] == val) {
break
}
pointer <- pointer + 1L
}
return(pointer)
}
先进后出
相关函数
Astack <- setRefClass(
Class = "Astack",
fields = list(
Maxsize = "integer",
topPos = "integer",
ArrayStack = "array"
),
methods = list(
# Initialization function
initialize = function(defaultSize = 100L, ...)
{
topPos <<- 0L
Maxsize <<- defaultSize # 100L
ArrayStack <<- array(dim = Maxsize)
},
# Check if stack is empty
isEmpty = function() {
},
# push value to stack
push = function(pushval) {
},
# Pop value from stack
pop = function() {
},
# Function to get size of stack
stacksize = function() {
},
# Function to get top value of stack
top = function() {
}
)
)
isEmpty = function() {
if (topPos == 0) {
cat("Empty Stack!")
return(TRUE)
} else
{
return(FALSE)
}
}
push = function(pushval) {
if ((topPos + 1L) > Maxsize)
stop("Stack is OUT OF MEMORY!")
topPos <<- topPos + 1L
ArrayStack[topPos] <<- pushval
}
pop = function() {
isEmpty() # Check if stack is empty
popval <- ArrayStack[topPos]
ArrayStack[topPos] <<- NA
topPos <<- topPos - 1L
return(popval)
}
stacksize = function() {
stackIsEmpty <- isEmpty()
ifelse(stackIsEmpty, return(0), return(topPos))
}
top = function() {
stackIsEmpty <- isEmpty()
if (stackIsEmpty) {
cat("Empty Stack")
} else
{
return(ArrayStack[topPos])
}
}
# array_stack_ex<- Astack$new()
# array_stack_ex$push(1)
# array_stack_ex$push(2)
# array_stack_ex$push(3)
# array_stack_ex$pop()
# array_stack_ex$push(5)
# array_stack_ex$pop()
# array_stack_ex$pop()
# array_stack_ex$top()
# array_stack_ex
Linkstack <- setRefClass(
Class = "Linkstack",
fields = list(Lsize = "integer",
Lstacktop = "environment"),
methods = list(
# Initialization function
initialize = function(...) {
Lsize <<- 0L
},
# Check if stack is empty
isEmpty = function() {
},
# Function to create empty R environment
create_emptyenv = function() {
},
# Function to create node
Node = function(val, node = NULL) {
},
# push value to stack
push = function(pushval) {
},
# Pop value from stack
pop = function() {
},
# Function to get top value of stack
top = function() {
}
)
)
isEmpty = function() {
if (Lsize == 0) {
cat("Empty Stack!")
return(TRUE)
} else
{
return(FALSE)
}
}
create_emptyenv = function() {
emptyenv()
}
Node = function(val, node = NULL) {
llist <- new.env(parent = create_emptyenv())
llist$element <- val
llist$nextnode <- node
llist
}
push = function(val) {
stackIsEmpty <- isEmpty()
if (stackIsEmpty) {
Lstacktop <<- Node(val)
Lsize <<- Lsize + 1L
} else
{
Lstacktop <<- Node(val, Lstacktop)
Lsize <<- Lsize + 1L
}
}
pop = function() {
stackIsEmpty <- isEmpty()
if (stackIsEmpty) {
cat("Empty Stack")
} else
{
Lstacktop <<- Lstacktop$nextnode
Lsize <<- Lsize - 1L
}
}
topVal = function() {
stackIsEmpty <- isEmpty()
if (stackIsEmpty) {
cat("Empty Stack")
} else
{
return(Lstacktop$element)
}
}
# link_stack_ex<-Linkstack$new()
# link_stack_ex $push(1)
# link_stack_ex $push(2)
# link_stack_ex $push(3)
# link_stack_ex $pop()
# link_stack_ex $push(5)
# link_stack_ex $pop()
# link_stack_ex $pop()
# a$topVal()
# link_stack_ex
队列是先进先出
使用的是RC类
aqueue <- setRefClass(
Class = "aqueue",
fields = list(
Alist = "array",
queuesize = "integer",
maxSize = "integer",
rear = "integer",
top = "integer"
),
methods = list(
initialize = function(qSize, ...) {
queuesize <<- 0L
rear <<- 1L
top <<- 0L
maxSize <<- as.integer(qSize)
Alist <<- array(dim = maxSize)
},
# Queue is empty
isEmpty = function() {
return(queuesize == 0L)
},
# Add element to the queue
enqueue = function(val) {
if (queuesize < maxSize) {
if (top == maxSize)
top <<- 0L
top <<- top + 1L
Alist[top] <<- val
queuesize <<- queuesize + 1L
} else{
cat("Queue Full!")
}
},
# remove element from queue
dequeue = function() {
if (queuesize > 0L) {
Alist[rear] <<- NA
ifelse(rear == maxSize, rear <<-
1L, rear <<- rear + 1L)
queuesize <<- queuesize - 1L
} else{
cat("Empty Queue!")
}
},
# size of queue
size = function() {
Lsize
}
)
)
ListQueue <- setRefClass(
Class = "ListQueue",
fields = list(
Lsize = "integer",
front = "environment",
rear = "environment",
Lqueue = "environment"
),
methods = list(
initialize = function(...) {
Lsize <<- 0L
},
# Check if list is empty
isEmpty = function() {
if (Lsize == 0) {
cat("Empty Stack!")
return(TRUE)
} else
{
return(FALSE)
}
},
# create empty environment
create_emptyenv = function() {
emptyenv()
} ,
# Create node
Node = function(val, node = NULL) {
llist <- new.env(parent = create_emptyenv())
llist$element <- val
llist$nextnode <- node
llist
},
# Function to add value to link list
enqueue = function(val) {
ListIsEmpty <- isEmpty()
if (ListIsEmpty) {
Lqueue <<- Node(val)
Lsize <<- Lsize + 1L
rear <<- Lqueue
} else
{
newNode <- Node(val)
assign("nextnode", newNode, envir = rear)
rear <<- newNode
Lsize <<- Lsize + 1L
}
},
# Function to remove node from link list
dequeue = function() {
stackIsEmpty <- isEmpty()
if (stackIsEmpty) {
cat("Empty Queue")
} else
{
Lqueue <<- Lqueue$nextnode
Lsize <<- Lsize - 1L
}
},
# Function to get link list size
size = function() {
Lsize
}
)
)
Adict <- setRefClass(Class = "Adict",
fields = list(
Alist = "list",
listsize = "integer",
key = "integer"
),
methods = list(
# Re-initialize dictionary
initialize = function(...) {
listsize <<- 0L
Alist <<- list()
},
# Check length of value
size = function() {
return(listsize)
},
# Add following key value pair in Array
addElement = function(key, val) {
Alist[[key]] <<- val
listsize <<- listsize + 1L
},
# remove value with defined
removeElement = function(key) {
Alist[[key]] <<- NULL
listsize <<- listsize - 1L
},
# remove value with following
findElement = function(key) {
return(key %in% names(Alist))
}
)
)
# dictvar <- Adict$new()
# dictvar$addElement("key1", 1)
# dictvar$addElement("key2", 1)
# dictvar
# dictvar$Size()
# dictvar$findElement("key1")
# dictvar$removeElement("key1")
Insertion_Sort <- function(V,n)
{
if(n==0) stop("No elements to sort")
for(i in 2:(length(V)))
{
val <- V[i]
j <- i - 1
while (j >= 1 && val <= V[j])
{
V[j+1] <- V[j]
j <- j-1
}
V[j+1] <- val
}
return(V)
}
Insertion_Sort(V=c(20,12,65,8,10,16,43,35),n=8)
## [1] 8 10 12 16 20 35 43 65
Bubble_Sort <- function(V,n) {
if(n==0) stop("No elements to sort")
for(i in 1:length(V)) {
flag <- 0
for(j in 1:(length(V)-i)) {
if ( V[j] > V[j+1] ) {
val <- V[j]
V[j] <- V[j+1]
V[j+1] <- val
flag <- 1
}
}
if(!flag) break
}
return(V)
}
Bubble_Sort(c(20,12,65,8,10,16,43,35),n=8)
## [1] 8 10 12 16 20 35 43 65
Selection_Sort_loop <- function(V,n) {
if(n==0) stop("No elements to sort")
keys <- seq_along(V)
for(i in keys) {
small_pos <- (i - 1) + which.min(V[i:length(V)])
temp <- V[i]
V[i] <- V[small_pos]
V[small_pos] <-temp
}
return(V)
}
Selection_Sort_loop(c(20,12,65,8,10,16,43,35),n=8)
## [1] 8 10 12 16 20 35 43 65
Shell_Sort <- function(V,n) {
if(n==0) stop("No elements to sort")
increment=round(n/2) ## as.integer
while(increment>0) {
for(i in (increment+1):n) {
temp <- V[i]
j=i
while(j >= (increment+1) && V[j-increment] > temp) {
V[j] <- V[j-increment]
j <- j-increment
}
V[j] <- temp
}
if(increment==2) {
increment <- 1} else{
increment <- round(increment/2.2)
}
}
return(V)
}
Shell_Sort(c(20,12,65,8,10,16,43,35,23,88,2,56,41,27,67,56),n=16)
## [1] 2 8 10 12 16 20 23 27 35 41 43 56 56 65 67 88
Merge_Sort <- function(V) {
if(length(V) == 0) stop("Not enough elements to sort")
## Merge function to sort two halves or sub-vectors
merge_fn <- function(first_half, second_half) {
result <- c()
while(length(first_half) > 0 && length(second_half) > 0) {
if(first_half[1] <= second_half[1]) {
result <- c(result, first_half[1])
first_half <- first_half[-1]
} else {
result <- c(result, second_half[1])
second_half <- second_half[-1]
}
}
if(length(first_half) > 0) result <- c(result, first_half)
if(length(second_half) > 0) result <- c(result, second_half)
return(result)
}
## Recursively split the parent vector into two halves (sub-vectors)
if(length(V) <= 1) V else {
middle <- length(V) / 2
first_half <- V[1:floor(middle)]
second_half <- V[floor(middle+1):length(V)]
first_half <- Merge_Sort(first_half)
second_half <- Merge_Sort(second_half)
if(first_half[length(first_half)] <= second_half[1]) {
c(first_half, second_half)
} else {
merge_fn(first_half, second_half)
}
}
}
Merge_Sort(c(20,12,65,8,10,16,43,35,23,88,2,56,41,27,67,56))
## [1] 2 8 10 12 16 20 23 27 35 41 43 56 56 65 67 88
Quick_Sort <- function(V,n) {
if (n <= 1) return(V)
left <- 0 ##start from left prior first element
right <- n ##start from rightmost element
v <- V[n] ## initialize last element as pivot element
## Partition implementation
repeat {
while (left < n && V[left+1] < v) left <- left+1
while (right > 1 && V[right-1] >= v) right <- right-1
if (left >= right-1) break
## Swap elements to put pivot in place
temp <- V[left+1]
V[left+1] <- V[right-1]
V[right-1] <- temp
}
## Recursive implementation of Quick sort
if (left == 0) return(c(V[n], Quick_Sort(V[1:(n-1)],n=(n-1))))
if (right == n) return(c(Quick_Sort(V[1:(n-1)],n=(n-1)), V[n]))
return( c(Quick_Sort(V[1:left],n=left), V[n], Quick_Sort(V[(left+1):(n-1)],n=(n-left-1))))
}
Quick_Sort(V= c(20,12,65,8,10,16,43,35,23,88,2,56,41,27,67,55),n=16)
## [1] 2 8 10 12 16 20 23 27 35 41 43 55 56 65 67 88
V <- c(20,12,65,8,10,16,43,35,23,88,2,56,41,27,67,56)
Heap_Sort <- function(V)
{
heapsize <- length(V)
for (i in floor(length(V)/2):1)
V <- max_heap(V, i,heapsize)
for (i in length(V):2) {
temp <- V[i]
V[i] <- V[1]
V[1] <- temp
heapsize <- heapsize -1
V <- max_heap(V, 1,heapsize)
}
return(V)
}
max_heap <- function(V, i,heapsize) {
left <- 2*i
right <- 2*i+1
if (left<=heapsize && V[left]>V[i]){
largest <- left}else{
largest <- i
}
if (right<=heapsize && V[right]>V[largest])
largest <- right
if (largest != i) {
temp2 <- V[largest]
V[largest] <- V[i]
V[i] <- temp2
V <- max_heap(V, largest,heapsize)
}
return(V)
}
Heap_Sort(c(20,12,65,8,10,16,43,35,23,88,2))
## [1] 2 8 10 12 16 20 23 35 43 65 88
# add item to bin
addItem=function(V,bin,maxValue,n){
for(i in 1:n){
val<-V[i]
ix<-ceiling((val*n)/maxValue)
if(is.na(bin[["binValues"]][[ix]][1])){
bin[["binValues"]][[ix]][1]<-val
bin[["nElement"]][ix]<-1
} else
{
bin <- insertItem(val=val, ix=ix,bin=bin)
}
}
return(bin)
}
# insert a item into a bin ensuring sorting
insertItem=function(val, ix,bin){
nElement<-bin[["nElement"]][ix]
pos<-NULL
for(i in 1:nElement){
if(val<bin[["binValues"]][[ix]][i]){
pos<-i
}
}
if(is.null(pos)){
bin[["binValues"]][[ix]][nElement+1]<-val
} else if(pos==1) {
bin[["binValues"]][[ix]]<-c(val, bin[["binValues"]][[ix]][1])
} else
{
bin[["binValues"]][[ix]]<-c(bin[["binValues"]][[ix]][1:(pos-1)], val, bin[["binValues"]][[ix]][pos:nElement])
}
bin[["nElement"]][ix]<-nElement+1
return(bin)
}
# bind the list into a sorted vector
bindSorted_vec=function(bin,n){
output <- c()
currentIx<-1
for(i in 1:n){
if(!is.na(bin[["binValues"]][[i]][1])){
nElement<-bin[["nElement"]][i]
for(m in 1:nElement){
output[currentIx]<-bin[["binValues"]][[i]][m]
currentIx<-currentIx+1
}
}
}
return(output)
}
# binsort Algorithm
Bin_Sort=function(V,n,maxValue){
bin <-list("binValues"=list(), "nElement"=NA)
# create empty bins
for(i in 1:n){
bin[["binValues"]][[i]]<-NA
bin[["nElement"]][i]<-0
}
## Add elements into suitable bins
bin <- addItem(V=V,bin=bin,maxValue=maxValue,n=n)
## Bind all bins inot a single sorted vector
output <- bindSorted_vec(bin=bin,n=n)
return(output)
}
## Example of bin sorting
V<-c(20,12,65,8,10,16,43,35,23,88,2,56,41,27,67,55)
n<-16
maxValue<-88
Bin_Sort(V=V,n=n,maxValue=maxValue)
## [1] 2 8 10 12 16 20 23 27 35 41 43 55 56 65 67 88
# add item to bin
addItem=function(V,bin,digLength,n){
for(i in 1:n){
val<-V[i]
## Extract the required digit from the number
ix<-floor((val/digLength) %% 10)+1
## Assign element to each bin
bin[["binValues"]][[ix]][bin[["nElement"]][ix]+1]<-val
## Track count of elements in each bin
bin[["nElement"]][ix]<-bin[["nElement"]][ix] + 1
}
return(bin)
}
# bind the list into a sorted vector
bindSorted_vec=function(bin){
output <- c()
currentIx<-1
for(i in 1:10){
if(!is.na(bin[["binValues"]][[i]][1])){
nElement<-bin[["nElement"]][i]
for(m in 1:nElement){
output[currentIx]<-bin[["binValues"]][[i]][m]
currentIx<-currentIx+1
}
}
}
return(output)
}
# radixsort Algorithm
radix_Sort=function(V,n,maxValue,digLength){
for(digLength in c(10^(0:digLength)))
{
bin <-list("binValues"=list(), "nElement"=NA)
# create empty bins
for(i in 1:10){
bin[["binValues"]][[i]]<-NA
bin[["nElement"]][i]<-0
}
bin <- addItem(V=V,bin=bin,digLength=digLength,n=n)
V <- bindSorted_vec(bin=bin)
}
return(V)
}
## Example of radix sorting
V<-c(67,54,10,988,15,5,16,43,35,23,88,2,103,83)
n<-14
maxValue<-988
digLength <- 2
radix_Sort(V=V,n=n,maxValue=maxValue,digLength=digLength)
## [1] 2 5 10 15 16 23 35 43 54 67 83 88 103 988
搜索是计算机应用程序中广泛使用的过程,主要是确定具有特定值的元素是否存在于向量或元素列表中。它在删除的情况下充当替代,因为如果不搜索特定值的元素,删除操作就不能发生。搜索可以是在一组给定的元素中查找一个元素(精确匹配),或者查找一组元素(范围匹配),这些元素属于某个值范围。在搜索操作中,元素的位置也被确定。该位置可以在稍后的删除操作中使用。如果在给定的vector(或list)中找到特定键值的元素,则说搜索成功;如果在给定的vector(或list)中找不到特定键值的元素,则说搜索不成功。
V <- c(12,15,8,4,56,32,0,3,20,28,32,25,36,18)
n=length(V)
S=11
Sequential_search <- function(V,S,n)
{
i=1
present = FALSE
while(i <= n & present==FALSE)
{
if(V[i] == S)
present=TRUE else i = i+1
}
return(list(present=present,key=i))
}
Sequential_search(V,S,n)
## $present
## [1] FALSE
##
## $key
## [1] 15
V <- c(12,15,8,4,56,32,0,3,20,28,32,25,36,18)
n=length(V)
S=11
V <- sort(V)
Seq_ord_search <- function(V, S=11, n)
{
i = 1
present = FALSE
while (i <= n & present == FALSE)
{
if (V[i] == S){
present = TRUE
}else if (V[i] > S){
stop("element S not found")
}else{
i = i + 1
}
}
return(present)
}
# Seq_ord_search(V, S, n)
V <- sort(V)
Jump_search <- function(V,S,n)
{
jump <- floor(sqrt(n))
present = FALSE
i=1
while(jump < n & V[jump] < S)
{
i=jump
jump = jump+floor(sqrt(n))
if(jump>=n)
stop("element S not found")
}
while(V[i] < S & i <= jump)
i = i+1
if(V[i]==S)
present=TRUE
return(present)
}
Jump_search(V,7,length(V))
## [1] FALSE
## Recursive
V <- c(12,15,8,4,56,32,0,3,20,28,32,25,36,18)
S=32
l=1
h=length(V)
Bin_search_recursive <- function(V,S,l,h) {
if ( h < l ) {
stop("h should be more than l")
} else {
m <- floor((l + h) / 2)
if ( V[m] > S )
Bin_search_recursive(V, S,l,m-1)
else if ( V[m] < S )
Bin_search_recursive(V, S, m+1, h)
else
return(m)
}
}
Bin_search_recursive(V,S,1,14)
## [1] 11
## iterative
Bin_search_iterative <- function(V, S,n) {
l=1
h=n
i = 0
while ( l <= h ) {
m <- floor((l + h)/2)
if ( V[m] > S )
h <- m - 1
else if ( V[m] < S )
l <- m + 1
else if(V[m]==S)
return(TRUE)
}
return(FALSE)
}
Bin_search_iterative(V,S,n)
## [1] TRUE
Interpolation_search <- function(V,S,n)
{
i=1; j=n; l=V[1]; h=V[j];
if(S<l | S>h) return(FALSE)
while(i < j)
{
k = floor(i+((j-i)*(S-l))/(h-l))
print(k)
split = V[k]
if(S>split){
i=k+1; l=split
}else if(S < split){
j=k-1; h=split
}else if(V[k]==S){
return(TRUE)}
}
return(FALSE)
}
Interpolation_search(V,S,n)
## [1] FALSE
V <- c(1,2,3,4,5,6,7,8)
S <- c(6,4,6,7,5,7,6,1,4,6,7,5)
n_search = length(S)
n=length(V)
SOL_count <- function(V,S,n_search,n)
{
if(is.null(V)) stop("NO elements in input vector")
if(is.null(S)) stop("NO elemens to search")
i=1
count <- as.list(sapply(1:n,function(x) 0))
names(count) <- V
while(i<=n_search)
{
if(Sequential_search(V,S[i],n)$present){
key <- Sequential_search(V,S[i],n)$key
count[key][[1]] <- count[key][[1]] + 1
count <- count[order(-unlist(count))]
V <- as.numeric(names(count))
}
i=i+1
}
return(V)
}
SOL_count(V,S,n_search,n)
## [1] 6 7 4 5 1 2 3 8
SOL_move <- function(V,S,n_search,n)
{
if(is.null(V)) stop("NO elements in input vector")
if(is.null(S)) stop("NO elemens to search")
i=1
while(i<=n_search)
{
if(Sequential_search(V,S[i],n)$present){
if(Sequential_search(V,S[i],n)$key !=1){
key <- Sequential_search(V,S[i],n)$key
temp <- V[key]
V <- V[-key]
V <- c(temp,V)
}
}
i <- i+1
}
return(V)
}
SOL_move(V,S,n_search,n)
## [1] 5 7 6 4 1 2 3 8
SOL_transpose <- function(V,S,n_search,n)
{
if(is.null(V)) stop("NO elements in input vector")
if(is.null(S)) stop("NO elemens to search")
i=1
while(i<=n_search)
{
if(Sequential_search(V,S[i],n)$present){
if(Sequential_search(V,S[i],n)$key !=1){
key <- Sequential_search(V,S[i],n)$key
temp <- V[key-1]
V[key-1] <- V[key]
V[key] <- temp
}
}
i <- i+1
}
return(V)
}
SOL_transpose(V,S,n_search,n)
## [1] 1 2 6 4 7 5 3 8
hash_int <- function(K)
{
return (K %% 18)
}
hash_string <- function(K,n)
{
hashValue <- 0
for(i in 1:n){
hashValue <- hashValue+as.numeric(charToRaw(substr(K,i,i)))
}
return(hashValue)
}
我们将介绍索引概念,这是必不可少的文件结构组织磁盘上的大数据。索引还有助于提高数据访问、数据搜索和内存分配的效率。
Tree based Indexing : BST node structure
bstnode <- function(key, value) {
node <- new.env(hash = FALSE, parent = emptyenv())
node$key <- key # Node key
node$value <- value # Node Value
node$left <- NULL # left children key
node$right <- NULL # Right children key
class(node) <- "bstnode"
return(node)
}
tttnode <- function(lkey=NULL, lvalue=NULL, rkey=NULL, rvalue=NULL) {
node <- new.env(hash = FALSE, parent = emptyenv())
node$lkey <- lkey # left Node key
node$lvalue <- lvalue # Node Value
node$rkey <- rkey # right Node key
node$rvalue <- rvalue # right Node Value
node$left <- NULL # left children key
node$center <- NULL # left children key
node$right <- NULL # Right children key
class(node) <- "tttnode"
return(node)
}
extttree <- tttnode(70, 70)
Function to check if node is empty
check_empty<-function(node){
ifelse((is.null(node$lkey) & is.null(node$rkey)), T, F)
}
Function to insert if the node has empty space
leaf_insert<-function(node, key, val){
if(check_empty(node)) return(tttnode(lkey=key, lvalue=val))
if(is.null(node$rkey)){
if(key>node$lkey){
node$rkey<-key
node$rvalue<-val
} else
{
node$rkey<-node$lkey
node$rvalue<-node$lvalue
node$lkey<-key
node$lvalue<-val
}
} else
{
node$left<-tttnode(key, val)
}
return(node)
}
The generalized pseudocode for element insertion in a 2-3 tree is as follows(在2-3树中插入元素的广义伪代码如下)
ttinsert<-function(node=NULL, key, val){
if(check_empty(node)) return(tttnode(lkey=key, lvalue=val))
if(is.null(node$left)) node<-leaf_insert(node, key, val)
## Add element to internal nodes
if(key<node$lkey){
subtree = ttinsert(node$left, key, val)
if (identical(subtree, node$left))
{
return(node);
} else
{
assign("left", subtree, envir = node)
return(node)
}
} else if(ifelse(is.null(node$rkey), T, key<node$rkey)){
subtree = ttinsert(node$center, key, val)
if(identical(subtree, node$center))
{
return(node)
} else
{
assign("center", subtree, envir = node)
return(node)
}
} else
{
subtree = ttinsert(node$right, key, val)
if(identical(subtree, node$right)) {
return(node)
} else
{
assign("right", subtree, envir = node)
return(node)
}
}
}
搜索2-3树中的伪代码如下
search_keys<-function(node, key){
if (is.null(node)) return(NULL) # empty node
if (node$lkey== key) return(node$lvalue)
if(!is.null(node$rkey) & node$rkey==key) return(node$rvalue)
if(key<node$lkey) {
sort_keys(node$left, key)
} else if(is.null(node$rkey)){
sort_keys(node$center, key)
} else if(key<node$rkey) {
sort_keys(node$center, key)
} else
{
sort_keys(node$right, key)
}
}
内存效率是设计数据结构和算法时必须考虑的一个重要方面。记忆可以大致分为两种类型:
1.主存储器(RAM) 2.外部存储器,如硬盘、CD ROM、磁带等
存储在主存(RAM)中的数据具有最小的访问时间,因此大多数算法首选,然而,如果数据存储在外部驱动器中,那么访问时间就变得至关重要,因为从外部存储器访问数据通常需要更长的时间。此外,随着数据大小的增加,检索成为一个问题。为了处理这个问题,数据以页面、块或外部存储设备中的分配单元的形式存储,并使用索引来高效地检索这些块。b -树是用于从外部存储设备访问数据的常用数据结构之一。b树是由R. Bayer和E. M. McCreight在1972年提出的,它比二叉搜索树更好,特别是当数据存储在外部存储器时。
bplusnode<-function(node=NULL, key, val){
node <- new.env(hash = FALSE, parent = emptyenv())
node$keys<-keys
node$child<-NULL
node$isleaf<-NULL
node$d<-NULL
class(node) <- "bplustree"
return(node)
}
dlinkchildNode <- function(val, prevnode=NULL, node=NULL) {
llist <- new.env(parent=create_emptyenv())
llist$prevnode <- prevnode
llist$element <- val
llist$nextnode <- node
class(llist) <- " dlinkchildNode"
llist
}
Range Query
querry_search<-function(node, key1, Key2){
## Function to get values within leaf node using link list
search_range<-function(child, key1, key2, val=NULL){
if(child$element>key1 & child$key2){
val<-c(val, child$element)
search_range(child$nextnode, key1, key2, val)
} else
{
return(val)
}
}
if(key1>key2){
temp<-Key2
key2<-key1
key1<-temp
}
child<-search_lower_key(node, key1) # search lower leaf
rangeVal<-search_range(child, key1, key2) # Return Range
return(rangeVal)
}
图是一种能够处理网络的数据结构类型。图被广泛地应用于不同的领域,例如:
图(G)是由一组边(E)相互连接的顶点(V)组成的网络。图的基本概念,补充以下。
考虑一个有n个顶点的图(G),它可以用两种形式表示。这些形式可以直接用于执行数学计算:
Graph_ADT <- setRefClass(
Class = "adjacency_Matrix",
fields = list(n = "integer"),
methods = list(
## Initialise a graph of n vertices
Initialize = function(n) {
},
## Return number of vertices and edges
num_vert = function() {
},
num_edges = function() {
},
## Return weight of an edge for a pair of connecting vertices v1 and v2
weightEdge = function(v1, v2) {
},
## Assign weight(wt) of an edge for a pair of connecting vertices v1 and v2
assignEdge = function(v1, v2, wt) {
},
## Delete weight of an edge for a pair of connecting vertices v1 and v2
deleteEdge = function(v1, v2) {
},
## Return first connecting vertex for a given vertex v
firstVertex = function(v) {
},
## Return next connecting vertex for a given v and its neighbour w
nextVertex = function(v, w) {
},
## Check for presence of an edge connection for a pair of vertices v1 and v2
isEdge = function(v1, v2) {
}
)
)
adjacencyMatrix <-
setRefClass(
Class = "adjacencyMatrix",
fields = list(n = "integer"),
methods = list(
## Initialise the graph of n vertices
Initialize <- function(n) {
numVertices <<- as.integer(n) ## with n vertices
numEdges <<- 0L ## with no connected edges
mark <<- list() ## initialise mark list
## initialse the mark of all vertices to 0 (unvisited)
for (i in 1:numVertices)
mark[[i]] <<- 0L
## generate a new nxn matrix with initial weights as 0
mat <- matrix()
for (i in 1:numVertices)
for (j in 1:numVertices)
mat[i, j] <<- 0L
},
## get number of vertices
num_vert <- function()
return(numVertices),
## get number of edges
num_edges <- function()
return(numEdges),
## return the first adjacent neighbout of vertex index v
firstVertex <- function(v) {
for (i in 1:numVertices)
if (mat[v, i] != 0)
return(i)
return(numVertices + 1)
},
## return next adjacent vertices of index v after
## getting index w using firstVertex
nextVertex <- function(v, w) {
for (i in (w + 1):numVertices)
if (mat[v, i] != 0)
return(i)
return(numVertices + 1)
},
## Assign weight (wt) to each connected edge of indices v1 and v2
assignEdge <- function(v1, v2, wt) {
if (wt < 0)
stop("Weight should be positive")
## increase the count of edges as the weights are assigned
if (mat[v1, v2] == 0)
numEdges <<- numEdges + 1L
## replace 0 with the wt
mat[v1, v2] <<- wt
},
## Delete a connected edge between indices v1 and v2
deleteEdge <- function(v1, v2) {
if (mat[v1, v2] != 0)
numEdges <<- numEdges - 1L
mat[v1, v2] <<- 0
},
## Check whether an edge exists between indices v1 and v2
isEdge <- function(v1, v2) {
return(mat[v1, v2] != 0)
},
## Get weight of the connected edge between indices v1 and v2
weightEdge <- function(v1, v2) {
return(mat[v1, v2])
},
## Get the mark of a vertex of index v1
getMark <- function(v1) {
return(mark[[v1]])
},
## initialise the mark of a vertex of index v1 with 1
initMark <- function(v1, val) {
mark[[v]] <<- val
}
)
)
adjacencyList <-
setRefClass(
Class = "adjacencyList",
fields = list(n = "integer"),
methods = list(
## Initialise the graph of n vertices
Initialize <- function(n) {
## with n vertices
numVertices <<- n
## with no connected edges
numEdges <<- 0L
## initialise mark list
mark <<- list()
## initialse the mark of all vertices to 0 (unvisited)
for (i in 1:numVertices)
mark[[i]] <<- 0L
## generate a linked list of edges each for
## each vertex in the list
vertex <- list()
for (i in 1:numVertices)
vertex[[i]] <<- llistofEdges()
},
## get number of vertices
num_vert <- function()
return(numVertices),
## get number of edges
num_edges <- function()
return(numEdges),
## return the first adjacent neighbout of vertex index v
firstVertex <- function(v) {
if (length(vertex[[v]]) == 0)
## indicates no adjacent neighbour
return(numVertices + 1)
## Move to the first adjacent vertex
adjVert <<- firstAdjVert(vertex[[v]])
## get the current position of AdjVert
pos <<- currentPos(vertex[[v]], adjVert)
## get value of connecting edge
adjEdge <<- getValue(vertex[[v]], adjVert)
return(adjVert)
},
## return next adjacent vertices of index v after
## getting index w using firstVertex
nextVertex <- function(v, w) {
if (isEdge(v, w)) {
if (pos + 1 > length(vertex[[v]])) {
## move the next adjcent vertex of w
adjVert <<- nextAdjVertex(vertex[[v]], w)
## get the current position of adjcent vertex
pos <<- currentPos(vertex[[v]], adjVert)
## get value of connecting edge
adjEdge <<- getValue(vertex[[v]], adjVert)
return(adjVert)
}
## no connecting edge
} else
return(numVertices + 1)
},
## Assign weight (wt) to each connected edge of indices v1 and v2
assignEdge <- function(v1, v2, wt) {
if (wt < 0)
stop("Weight should be positive")
##check whether edge exists between v1 and v2
if (isEdge(v1, v2)) {
## insert vertex v2 along with edge weight wt
insertVertex(vertex[[v1]], v2, wt)
}
},
## Delete a connected edge between indices v1 and v2
deleteEdge <- function(v1, v2) {
if (isEdge(v1, v2)) {
removeEdge(v1, v2)
numEdges <<- numEdges - 1L
}
},
## Check whether an edge exists between indices v1 and v2
isEdge <- function(v1, v2) {
pos <- currentPos(vertex[[v1]], firstAdjVert(vertex[[v1]]))
while (pos < length(vertex[[v1]])) {
adjVert <- nextAdjVertex(vertex[[v1]], vertex[[v1]][pos])
if (adjVert == v2) {
return(TRUE)
} else {
pos = pos + 1
}
}
},
## Get weight of the connected edge between indices v1 and v2
weightEdge <- function(v1, v2) {
if (isEdge(v1, v2)) {
adjEdge <- getValue(vertex[[v1]], v2)
return(adjEdge)
} else {
return (0)
}
},
## Get the mark of a vertex of index v1
getMark <- function(v1) {
return(mark[[v1]])
},
## initialise the mark of a vertex of index v1 with 1
initMark <- function(v1, val) {
mark[[v]] <<- val
}
)
)
graph_Traverse <- function(Graph_ADT, n, vertices)
{
## Initialise marks to zero
verticesMarks <- list()
for (i in 1:n)
verticesMarks[[i]] <-
Graph_ADT$initMark(vertices[i], 0) ## 0 means not visited
## Inititate traversing upon checking for unmarked nodes
for (i in 1:n)
if (Graph_ADT$getMark(vertices[i]) == 0)
initTraverse(verticesMarks, vertices[i])
}
DFS是图遍历算法的递归实现,适用于有向图和无向图。在遍历的每个步骤中,DFS递归地检查和访问所有未访问的节点,这些节点直接与考虑中的节点连接。同时,沿着路径访问的所有节点将按照遍历的顺序推入堆栈。在遍历过程中,如果遇到的节点中没有直接连接的未访问节点,则将这些节点从堆栈中弹出,留下那些直接连接的未访问节点。这将跟踪那些确定向前遍历路径的节点,以便在顶点之前访问所有节点。下面的R代码用三个输入实现了DFS算法。输入为Graph_ADT, n(图中节点数),v(考虑执行DFS的节点数):
DepthFirstSearch <- function(Graph_ADT, n, v)
{
## Ensure all nodes are visited and processed prior node v
preVisit(v)
## mark the node v under consideration as 1 (i.e. visited)
VerticesMarks <- list()
VerticesMarks[[v]] <- Graph_ADT$initMark(v, 1)
## Recursively visit all connected nodes of v till all are marked as 1
## get the first vertex
node <- Graph_ADT$firstVertex(v)
## check node belongs to neighbouring nodes using conVert function
while (node %in% conVert(v)) {
## check if the node is unvisited
if (Graph_ADT$getMark(VerticesMarks[[node]] == 0))
## recursively run DFS
DepthFirstSearch(Graph_ADT, n, node)
## assign next neighbouring vertex
node <- Graph_ADT$nextVertex(v, node)
}
## Run post processing remaining un-visited nodes
postVisit(v)
}
BFS的工作原理与DFS算法相似,但有以下几点: 1. 与DFS不同,BFS不是一个递归实现 2. 为了跟踪标记的节点,BFS使用队列来对抗DFS使用的堆栈 3. 在移动到下一个节点之前,BFS确保访问所有直接未标记连接的节点 节点不同于DFS,在DFS中,每次迭代只访问一个未标记的连接节点 下面的R代码使用四个输入实现BFS算法。输入是Graph_ADT, startVertex(开始遍历的图的起始节点),queue(一个空队列,用于跟踪按访问顺序连接的节点)和n(图中的节点总数):
BreadthFirstSearch <- function(Graph_ADT, startVertex, queue, n)
{
## initialise an empty queue with a start vertex
queue <- initQueue(startVertex)
## Initialise first vertex by marking it as 1 (visited)
VerticesMarks <- list()
VerticesMarks[[v]] <- Graph_ADT$initMark(v, 1)
## Subsequently start processing in queues
while (length(queue) != 0) {
## extract first element in the queue
v <- extQueue(queue)
## Pre-Process all directly connected nodes of v
preVisit(v)
## Mark visited nodes with 1 and accordingly queue the nodes
node <- firstVertex(v)
while (node %in% conVert(v)) {
if (getMark(graph[node] == 0)) {
graph <- Graph_ADT$initMark(node, 1)
queue <- initQueue(node)
}
node <- Graph_ADT$nextVertex(startVertex, node)
}
}
}
Main function to perform topological sort
Topological_DFS_sort <- function(Graph_ADT, n, vertices)
{
## initialise all nodes with 0 (unvisited)
verticesMarks <- list()
for(i in 1:n)
verticesMarks[[i]] <<- Graph_ADT$initMark(vertices[i],0)
## Process all nodes by recursive traversing
for(i in 1:n)
if(Graph_ADT$getMark(vertices[i]) == 0)
topological_secondary(Graph_ADT,i)
}
recursive secondary function to help main function
topological_secondary <- function(Graph_ADT,i)
{
## Mark the node as 1 (visited)
verticesMarks[[i]] <<- Graph_ADT$initMark(vertices[i], 1)
## Perform traversing across connected nodes
v <- Graph_ADT$firstVertex(vertices[i])
while(v %in% conVert(vertices[i])){
if(Graph_ADT$getMark(vertices[i] == 0))
topological_secondary(vertices,v)
v <- Graph_ADT$nextVertex(vertices[i],v)
}
return(v)
}
Topological sort using Breadth First Search (BFS)
Topological_BFS_sort <- function(Graph_ADT, queue, n, vertices)
{
## Initialise a list to track count of inwards edges for each node
countEdge <- list()
## initialise count of each node to 0
for (i in vertices)
countEdge[[i]] <- 0
## Assign count (inward nodes) prerequisite to each node
for(i in vertices){
v <- Graph_ADT$firstVertex(vertices[i])
while(v %in% conVert(vertices[i])){
countEdge[[v]] <- countEdge[[v]] + 1
v <- Graph_ADT$nextVertex(vertices[[i]],v)
}
}
## Initialize queue with nodes which have zero count of inward edges
for(i in vertices)
if(countEdge[[i]] == 0)
queue <- Graph_ADT$initQueue(i)
## Process the nodes which are in the queue
while(length(queue) != 0){
v <- extQueue(queue)
print(v)
w <- Graph_ADT$firstVertex(v)
while(w %in% conVert(vertices[v])){
## Decrease the count prerequisite by 1
countEdge[[w]] <- countEdge[[w]] - 1
if(countEdge[[w]] == 0) ## no prerequisites
queue <- initQueue(w)
w <- Graph_ADT$nextVertex(vertices[v],w)
}
}
}
PriorityQueueInit <-
setRefClass("PriorityQueueInit",
fields = list(keys = "integer", values = "integer"),
methods = list(
push = function(key,value) {
keys <<- append(keys, key)
values <<- append(values, value)
},
extractMinVertex = function() {
minPos <- which(values==min(values))
key <- keys[[minPos]]
value <- values[[minPos]]
return(list(key=key,value=value))
}
))
DijkstraShortestPath <- function(Graph_ADT, sourceVertex, vertices, n)
{
library(hashmap) ## To create new hashmap instances
## Initiate a new priority queue
## It can perform push, entractMinVertex
## push : add new vertices along with their distances from source node
## entractMinVertex : extract the vertex with minimum value
## key represents all the vertices of the graph
## value represents the vertex value or distance from the source vertex
priorityQueue <- PriorityQueueInit$new()
## Initiate a hashmap to store shortest distance from source vertex to every vertex
## keys are all the vertices of the graph other than source vertex
## values are the corresponding distances from the source node
## Dimension of hashmap is n, where n is total number of vertices in graph G
## Initialize all vertices with distance as 0 which will later be updated
distanceMap <- hashmap(keys=vertices,
values = rep(0,n))
## Initiate another hashmap to store the parent vertex to keep a track of shortest
## path from source vertex to every vertex
## key represents the child (to) vertex
## value represents the parent (from) vertex
## initialize key with source vertex and value with NULL
parentMap <- hashmap(keys = sourceVertex,
values = "NULL")
## initialize priority queue with value of all vertices to infinity
for( i in vertices)
priorityQueue$push(vertices[i],Inf)
## Set the distance of sourceVertex as zero
priorityQueue$values[which(priorityQueue$keys==sourceVertex)] <- 0
## Begin iteration till the all the vertices from priorityQueue becomes empty
while(length(priorityQueue$keys) != 0){
## Extract vertex with minimum value from priority queue along with its value
headVertex <- priorityQueue$extractMinVertex()
## Assign the key of the head vertex as current vertex
currentVertex <- headVertex$key
## Append distancemap with current key and its value
distanceMap[[currentVertex]] <- headVertex$value
## Check for all directly connected vertices for the current vertex
for(conVert in getConVertex(graph,currentvertex)){
## get all the corresponding edge value
edgeValue <- getEdgeValue(graph,currentvertex,conVert)
## Check priority queue contains the adjacent connected vertex (conVert) or not
## If yes, then proceed ahead
## if no, conVert vertex already has shortest distance from source vertex
if(!priorityQueue$keys %in% conVert){
next
}
## Now evaluate the distance of the adjacent vertex (conVert) with source vertex
## via current vertex. Add the distance of current vertex with edge value of
## adjacent vertex (conVert)
updDistance <- distanceMap[[currentVertex]] + edgeValue
## Check whether the value of the adjacent vertex in priorityQueue is greater than
## the updated distance or not. If yes, then decrease the value in the priorityQueue
## to the updated distance and also update the parent map of the adjacent vertex
## current vertex
if(priorityQueue$values[which(priorityQueue$keys==conVert)] > updDistance){
priorityQueue$values[which(priorityQueue$keys==conVert)] <- updDistance
parentmap[[conVert]] <- currentVertex
}
}
}
}
Prim’s algorithm
Priority queue which supports push, entractMinVertex, containsVertex,getWeight for Prim’s algorithm
PriorityQueueInit <-
setRefClass("PriorityQueueInit",
fields = list(keys = "integer", values = "integer"),
methods = list(
push = function(key,value) {
keys <<- append(keys, key)
values <<- append(values, value)
},
extractMinVertex = function() {
minPos <- which(values==min(values))
key <- keys[[minPos]]
value <- values[[minPos]]
return(list(key=key,value=value))
}
))
Implement Prim’s algorithm
primMST <- function(Graph_ADT,vertices,n)
{
library(hashmap) ## To create new hashmap instances
## Initiate a new priority queue
## It can perform push, entractMinVertex
## push : add new vertices along with their distances from source node
## entractMinVertex : extract the vertex with minimum value
## key represents all the vertices of the graph
## value represents the vertex value or distance from the source vertex
priorityQueue <- PriorityQueueInit$new()
## Initiate a hashmap to store shortest distance from source vertex to every vertex
## keys are all the vertices of the graph other than source vertex
## values are the corresponding distances from the source node
## Dimension of hashmap is n, where n is total number of vertices in graph G
## Initialize all vertices with distance as 0 which will later be updated
distanceMap <- hashmap(keys=vertices,
values = rep(0,n))
## Initialise a list to store final MST result
MSTResult <- list()
## initialize priority queue with value of all vertices to infinity
for( i in vertices)
priorityQueue$push(vertices[i],Inf)
## begin with a random vertex
startVertex <<- vertices[sample(1:n, 1)]
## Set the distance of startVertex as zero
priorityQueue$values[which(priorityQueue$keys==startVertex)] <- 0
## Begin iteration till the all the vertices from priorityQueue becomes empty
while(length(priorityQueue$keys) != 0){
## Extract vertex with minimum value from priority queue along with its value
headVertex <- priorityQueue$extractMinVertex()
## Assign the key of the head vertex as current vertex
currentVertex <- headVertex$key
## Append distancemap with current key and its value
distanceMap[[currentVertex]] <- headVertex$value
## Check for all directly connected vertices for the current vertex
for(conVert in getConVertex(graph,currentvertex)){
## get all the corresponding edge value
edgeValue <- getEdgeValue(graph,currentvertex,conVert)
## Check priority queue contains the adjacent connected vertex (conVert) or not
## If yes, then proceed ahead
## if no, conVert vertex already has shortest distance from current vertex
if(!priorityQueue$keys %in% conVert){
next
}
## Now evaluate the distance of the adjacent vertex (conVert) with current vertex.
## Update the distance with the edge value
updDistance <- edgeValue
## Check whether the value of the adjacent vertex in priorityQueue is greater than
## the updated distance or not. If yes, then decrease the value in the priorityQueue
## to the updated distance.
if(priorityQueue$values[which(priorityQueue$keys==conVert)] > updDistance){
priorityQueue$values[which(priorityQueue$keys==conVert)] <- updDistance
MSTResult[[currentVertex]] <- conVert
}
}
}
}
Define a reference class which can perform disjoint set operations
The operations are UNION, DIFFER and FIND
Union: to merge two sets together
Differ: to check whether vertices are in different sets
Find: to find whether a vertex is in a set or not
disjoinSetPointer <- setRefClass("disjoinSetPointer",
fields = list(vertex = "vector",
set1 = "vector",
set2 = "vector",
currentVertex = "integer"),
methods = list(
## merge two sets
union = function(set1,set2){
return(c(set1,set2))
},
## check whether set1 and set 2 are disjoint
## return TRUE if they are disjoint
differ = function(set1,set2){
if(sum(set1 %in% set1) ==0){
return(TRUE)} else(return(FALSE))
},
## Find whether a vertex is in a set or not
## returns root of the currentVertex
## function ROOT returns root of the vector
find = function(currentVertex){
return(ROOT(vertex[currentvertex]))
}
))
Define a reference class to store the edges along with from and to vertices on min-heap
kruskalArray <- setRefClass("kruskalArray",
fields = list(fromVertex = "numeric",
toVertex = "numeric",
weight = "numeric"),
methods = list(
## insert new from and to vertices along with edge
push = function(f, t, w){
fromVertex <<- append(fromVertex,f)
toVertex <<- append(toVertex,t)
weight <<- append(weight,w)
},
## extract from and to vertices having minimum edge value
## alos remove from, to and edge value from the array
extractMinEdge = function() {
minPos <- which(weight==min(weight))
from <- fromVertex[[minPos]]
to <- toVertex[[minPos]]
fromVertex <<- fromVertex[[-minPos]]
toVertex <<- toVertex[[-minPos]]
weight <<- weight[[-minPos]]
return(list(from=from,to=to))
}
))
Implement Kruskals algorithm
kruskalMST <- function(Graph_ADT, n, e)
{
## initialse reference classes disjoinSetPointer and kruskalArray
vertexArray <- disjoinSetPointer$new()
edgeArray <- kruskalArray$new()
## Initialise a list to store final MST result
MSTResult <- list()
## Put all the edges in the edgeArray
for(i in 1:n){
j <- firstVertex(i)
while(i <= n){
edgeArray$push(i,j,Graph_ADT$weightEdge(i,j))
}
}
## Initialise n equivalent sets
numMST <- n
## Iteratively combine equivalent sets based on edge weights
## edges are extracted based on their value. Smallest edges are extracted first
for(i in 1:e){
while(numMST >= 1){
# get the from and to vertices having minimum edge value
temp <- edgeArray$extractMinEdge()
fromVertex <- temp$from
toVertex <- temp$to
## Check whether two vertices are in different sets
if(vertexArray$differ(fromvertex,toVertex)){
## if yes, then combine from and to vertices into one set
vertexArray$union(fromvertex,toVertex)
## add this edge to MST
MSTResult[[i]] <- c(fromVertex,toVertex)
## decrease the sets by 1
numMST <- numMST - 1
}
}
}
return(MSTResult)
}
动态规划可以定义为一种使用循环公式(将一个问题分解为一个子问题)来解决任何复杂问题的方法。利用先前的状态解重构任意问题的子解。基于动态规划的方法能够达到多项式复杂度来解决问题,并确保比其他经典方法(如蛮力算法)更快的计算速度。
使用递归计算狄波拉契数列
nfib<-function(n){
assertthat::assert_that(n>0) & assertthat::assert_that(n<50)
if(n==1 || n==2) return(1)
val<- nfib(n-1) + nfib(n-2)
return(val)
}
使用动态规划计算狄波拉契数列
nfib_DP<-function(n){
assertthat::assert_that(n>0) & assertthat::assert_that(n<50)
if(n<=2) return(1)
lag2_val<-0
lag1_val<-1
nfibval<-1
# Loop to compute Fibonacci value
for(i in 3:n){
lag2_val<-lag1_val
lag1_val<-nfibval
nfibval<-lag2_val+lag1_val
}
return(nfibval)
}
背包问题是一个组合优化问题,它要求在不超过容量约束的情况下,选择某一给定项目的一个子集使利润最大化。根据物品数量和可用背包的数量,文献中报告了不同类型的背包问题:0-1背包问题,其中每个物品最多被选择一次;有界背包问题对每个项目的选择都有一个约束;多选题背包问题,其中多个背包是礼物,从多个集合中选择物品;在多约束背包问题中,背包的体积和重量都有约束。
0-1背包问题是,给你一个可装载重量为 W 的背包和 n 个物品,每个物品有重量和价值两个属性。其中第 i 个物品的重量为 S[i],价值为 C[i],现在让你用这个背包装物品,最多能装的价值是多少?
公示可以表示为:
使用动态规划计算0-1背包问题:
knapsack_DP<-function(W, S, C, n){
require(pracma)
K<-zeros(n+1, W+1)
for(i in 1:(n+1)){
for(j in 1:(W+1)){
if(i==1 | j==1){
K[i,j]=0
} else if(S[i-1] <= j){
K[i, j] = max(C[i-1] + K[i-1, (j-S[i-1])], K[i-1, j])
} else
{
K[i, j] = K[i-1, j]
} }
}
return(K[n+1, W+1])
}
所有对最短路径(APSP)问题的重点是寻找所有对顶点之间的最短路径。
解决APSP问题的方法之一是使用Floyd-Warshall算法,该算法使用动态规划。该方法基于这样一种观察,即连接两个顶点u和v的任何路径之间可能有零个或多个顶点,从而定义了一条路径。Floyd-Warshall算法的R实现如下:
floydWarshall<-function(graph){
nodes<-names(graph)
dist<-graph
for (n in nodes){
for(ni in nodes){
for(nj in nodes){
if((dist[[ni]][n]+dist[[n]][nj])<dist[[ni]][nj]){
dist[[ni]][nj]<-dist[[ni]][n]+dist[[n]][nj]
}
}
}
}
return(dist)
}
Example script for Floyd-Warshall algorithm
# Defining graph structure
graph<-list()
graph[["A"]]=c("A"=0, "B"=8, "C"=Inf, "D"=Inf, "E"=1, "F"=Inf)
graph[["B"]]=c("A"=Inf, "B"=0, "C"=7, "D"=6, "E"=Inf, "F"=Inf)
graph[["C"]]=c("A"=Inf, "B"=Inf, "C"=0, "D"=6, "E"=Inf, "F"=Inf)
graph[["D"]]=c("A"=Inf, "B"=Inf, "C"=Inf, "D"=0, "E"=Inf, "F"=4)
graph[["E"]]=c("A"=Inf, "B"=3, "C"=Inf, "D"=Inf, "E"=0, "F"=9)
graph[["F"]]=c("A"=Inf, "B"=3, "C"=Inf, "D"=4, "E"=9, "F"=0)
# get shortest pair distance
APSP_Dist<-floydWarshall(graph)
图形存储为R中的列表,可以使用边名称调用。Inf表示没有链接
在计算非常昂贵的情况下,引入随机性可以帮助减少计算工作量,但会牺牲准确性。
在未排序的列表中查找最大值的计算成本为O(n)。任何确定性算法都需要O(n)努力来确定最大值。然而,在时间至关重要且n非常大的场景中,使用近似算法,而不是寻找实际解,而是确定更接近实际解的解。随机算法可根据其目标分为以下几类:
1.Las Vegas algorithm(拉斯维加斯算法):拉斯维加斯算法以一定的概率失败,因此我们可以继续该算法,直到我们得到合适的结果,但它对时间有影响,时间变得无界。因此,拉斯维加斯算法通常在规定的时间内使用合理的结果(如果算法失败,则会引发错误)。 2.Monte Carlo algorithm(蒙特卡罗算法):使用蒙特卡罗算法,我们无法测试算法何时可能失败;然而,可以通过增加迭代次数和获得预期结果来降低失效概率。
通常,拉斯维加斯算法是首选。然而,在某些情况下,蒙特卡罗算法可能是有用的,尤其是如果我们能够降低失效概率,并且数据本身涉及不确定性的话。例如,假设我们想使用样本数据确定一座城市的平均高度。我们必须使用样本运行蒙特卡罗算法来估计平均值,以获得预期值和涉及的波动系数。如果样本不能很好地代表总体,总是有可能得到错误的答案,但除非我们有总体数据集,否则我们将无法检测到它。
skip list列表是Bill Pugh在1990年开发的概率数据结构。它是为了解决链接列表和数组中的搜索限制而开发的。skip list提供了二叉搜索树(BST)和类似基于树的数据结构的替代方案,这些数据结构往往会变得不平衡。Skip list比基于树的数据结构更容易实现。然而,它们不能保证最佳性能,因为它们使用随机化来排列条目,以便搜索和更新时间平均为O(logn),其中n是字典中的条目数。skip 列表中的平均时间与键和输入的分布无关。相反,它只取决于在插入等操作期间选择的随机化种子。这是实现复杂性和算法性能之间折衷的一个很好的例子。
skip list可以被视为已排序链接列表的扩展。 skip list
其中有两个链:S0连接每个节点,S1连接排序链表的每个备用节点。
skip list 可以是n阶的,以下是一个2阶skp list。
Skip node data structure
skListNode<-function(val, height=1){
# function to create empty environment
create_emptyenv = function() {
emptyenv()
}
# Create skiplist node
skiplist <- new.env(parent=create_emptyenv())
skiplist$element <- val
skiplist$nextnode<-rep(list(NULL), height)
class(skiplist) <- "skiplist"
skiplist
}
Function to find a value in skip list
findvalue<-function(skiplist, searchkey){
for (i in level:1){
# Assign head values
skiplist<-skiplist$nextnode[[i]]
while(!is.null(node) && node$element>searchkey){
skiplist<-skiplist$nextnode[[i]]
}
skiplist = skiplist$nextnode[0]
if(!is.null(skiplist) && searchkey==skiplist$element){
# Return element from Skip node
return(skiplist$element)
} else
{
# Element not found return NULL
return(NULL)
}
}
}
数据结构是任何算法的组成部分。堆、队列、堆栈、树和哈希表是广泛跨编程语言使用的各种形式的数据结构。其中一些主要用于查找,如树和哈希表,而另一些,如堆、队列和堆栈,则用于更新修改,如插入和删除。
rstackdeque提供了fully persistent stack data structure,在这个包中,堆栈是使用无序链表实现的,其中每个节点(列表)由数据元素和对下一个节点的引用组成。这些堆栈是S3对象,可以通过head堆栈节点访问。
Wiki of fully persistent stack data structure:在计算中,持久数据结构或非临时数据结构是一种在修改时始终保留其先前版本的数据结构
library(rstackdeque)
## Fully Persistent stacks
a <- as.rstack(c("p", "q", "r","s","t"))
b <- insert_top(a, c("o"))
c <- without_top(a)
d <- peek_top(a)
## Slowly Pesistent Queues and Dequeues
a <- as.rdeque(c("p", "q", "r","s","t","u","v"))
b <- insert_front(a, c("o"))
c <- insert_back(a, c("w"))
d <- without_front(a)
e <- without_back(a)
## Fast Fully Persistent Queues ##
a <- as.rpqueue(c("p","q","r","s"))
b <- insert_back(a, "t")
c <- without_front(b)
d <- insert_back(c,"v")
e <- insert_back(d,"w")
f <- without_front(e)