Blocks

区块链是一连串的块 , 块(Block) 是存储数据的容器

  1. an identification number 身份标识号吗
  2. a timestamp of block creation 块的创建时间戳
  3. data 相关数据
  4. a reference to the previous block (parent block) in the chain 对前一个块的引用
block <- list(number = 3,
             timestamp = Sys.time() ,
             data = "Some Data in Block Chain",
             parent = 2)

block
## $number
## [1] 3
## 
## $timestamp
## [1] "2023-09-26 22:16:42 CST"
## 
## $data
## [1] "Some Data in Block Chain"
## 
## $parent
## [1] 2

Chains

块连接成一个链。该链的第一个区块称为起源区块,没有父区块。链的最后一个块是当前块,不是任何其他块的父代

# some blocks
block1 <- list(number = 1,
             timestamp = Sys.time(),
             data = "London",
             parent = NA)

block2 <- list(number = 2,
             timestamp = Sys.time(),
             data = "Paris",
             parent = 1)

block3 <- list(number = 3,
             timestamp = Sys.time(),
             data = "Rome",
             parent = 2)

# the blockchain
blockchain = list(block1, block2, block3)

# get the 2nd block
blockchain[[2]]
## $number
## [1] 2
## 
## $timestamp
## [1] "2023-09-26 22:16:42 CST"
## 
## $data
## [1] "Paris"
## 
## $parent
## [1] 1

我们还要为区块链编写一个验证函数。现在,我们只检查非起源块的父字段是否引用链中的前一个块。

validate = function(blockchain) {
  if (length(blockchain) >= 2) {
    for (i in 2:length(blockchain)) {
      if (blockchain[[i]]$parent != blockchain[[i-1]]$number) {
        return(FALSE)
      }
    }
  }
  return(TRUE)
}

validate(blockchain)
## [1] TRUE

Hash

链的链接结构比上面描述的更复杂: 区块链中的每个区块通过其内容的散列值和对父区块的散列的引用来识别,或者如果该区块是起源区块,则为0。

块的哈希是使用加密哈希函数创建的,哈希函数是将任意大小的数据映射到固定大小的位字符串(称为哈希或摘要)的数学算法。散列用于验证块的信息内容: 通过修改哪怕是一位的内容,散列就完全改变了。此外,请注意,当前块的摘要是根据前面的块摘要计算的。这意味着,如果您更改一个块,您不仅需要修改它的散列,而且需要修改以下所有块的散列,以使链有效。

这里使用的哈希算法(SHA-256)是 SHA-2(SHA-2)的一部分,这是美国国家安全局(NSA)设计的一组加密哈希函数。尤其是使用256位(或32个十六进制数字)的摘要。它是在 R 包摘要中实现的。

library(digest)

digest("A blockchain is a chain of blocks", "sha256")
## [1] "5c2005976411a1628fabcdde3ac04be563d18943e710b7446afa2a8a5fab9abc"

对块进行哈希编码

# hash blocks
block1 <- list(number = 1,
             timestamp = "2018-10-01 17:24:00 CEST",
             data = "London",
             parent_hash = "0")

block1$hash = digest(block1, "sha256")

block2 <- list(number = 2,
             timestamp = "2018-10-01 17:24:15 CEST",
             data = "Paris",
             parent_hash = block1$hash)

block2$hash = digest(block2, "sha256")

block3 <- list(number = 3,
             timestamp = "2018-10-01 17:24:30 CEST",
             data = "Rome",
             parent_hash = block2$hash)

block3$hash = digest(block3, "sha256")

# the blockchain
blockchain = list(block1, block2, block3)

让我们更新验证函数,并给出一个简单的例子:

validate = function(blockchain) {
  for (i in 1:length(blockchain)) {
    block = blockchain[[i]]
    hash = block$hash
    block$hash = NULL
    hash_expected = digest(block, "sha256")
    if (hash != hash_expected) {
      return(FALSE)
    }
  }
  if (length(blockchain) >= 2) {
    for (i in 2:length(blockchain)) {
      if (blockchain[[i]]$parent_hash != blockchain[[i-1]]$hash) {
        return(FALSE)
      }
    }
  }
  return(TRUE)
}

validate(blockchain)
## [1] TRUE

Proof of Work

单独使用哈希并不足以防止篡改,因为计算机可以快速计算哈希值。工作证明(PoW)算法控制创建新块的难度。对于像 BitCoin 或 Etherum 这样的区块链,区块是由所谓的矿工创建(开采)的。当需要创建一个新块时,会向网络发送一个硬计算问题。解决这个问题的矿工首先创建新的块,并获得加密货币奖励。

在比特币的情况下,PoW 问题涉及到寻找一个数字(称为 nonce)的问题.为了找到一个有效的 nonce,挖掘者需要执行的平均工作量在难度上是指数级的,而人们可以通过执行一个散列函数来验证块的有效性。

proof_of_work = function(block, difficulty) {
  block$nonce <- 0
  hash = digest(block, "sha256")
  zero <- paste(rep("0", difficulty), collapse="")
  while(substr(hash, 1, difficulty) != zero) {
      block$nonce = block$nonce + 1
      hash = digest(block, "sha256")  
  }
  return(list(hash = hash, nonce = block$nonce))
}

block <- list(number = 1,
             timestamp = "2018-10-01 17:24:00 CEST",
             data = "London",
             hash = "88e96d4537bea4d9c05d12549907b32561d3bf31f45aae734cdc119f13406cb6Parent",
             parent_hash = "d4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3")


proof_of_work(block, 3)
## $hash
## [1] "0003b3433712535240d61d5c5c29c2975ec8a170e941323d489bcc5fedad55f6"
## 
## $nonce
## [1] 2937

查看Pow 的效率

n = 6
iterations = vector("integer", n)
for (i in 1:n) {
  iterations[i] = proof_of_work(block, i)$nonce
}

iterations
## [1]       6     165    2937   14644  605054 1926389
plot(1:n, log2(iterations), type = "b", xlab = "difficulty", ylab = "log(iterations)")
## Warning in check_font_path(italic, "italic"): 'italic' should be a length-one
## vector, using the first element

现在使用Pow 方法创建一个区块链

mine <- function(previous_block, difficulty = 3, genesis = FALSE){
  
  if (genesis) {
    # define genesis block
    new_block <-  list(number = 1,
                       timestamp = Sys.time(),
                       data = "I'm genesis block",
                       parent_hash = "0")  
  } else {
    # create new block
    new_block <- list(number = previous_block$number + 1,
                      timestamp = Sys.time(),
                      data = paste0("I'm block ", previous_block$number + 1),
                      parent_hash = previous_block$hash)
  }
  # add nonce with PoW
  new_block$nonce <- proof_of_work(new_block, difficulty)$nonce
  # add hash 
  new_block$hash <- digest(new_block, "sha256")
  return(new_block)
}

blockchained = function(difficulty = 3, nblocks = 3) {
  # mine genesis block
  block_genesis = mine(NULL, difficulty, TRUE)   
  # first block is the genesis block
  blockchain <- list(block_genesis)

  if (nblocks >= 2) {
    # add new blocks to the chain
    for (i in 2:nblocks){
      blockchain[[i]] <- mine(blockchain[[i-1]], difficulty) 
    }
    
  }
  
  return(blockchain)
}


blockchained(nblocks = 3)
## [[1]]
## [[1]]$number
## [1] 1
## 
## [[1]]$timestamp
## [1] "2023-09-26 22:18:14 CST"
## 
## [[1]]$data
## [1] "I'm genesis block"
## 
## [[1]]$parent_hash
## [1] "0"
## 
## [[1]]$nonce
## [1] 10102
## 
## [[1]]$hash
## [1] "0009e30900402cf2af6f7372a74dfb819974b96ec23a49cab38e9417bad5bb47"
## 
## 
## [[2]]
## [[2]]$number
## [1] 2
## 
## [[2]]$timestamp
## [1] "2023-09-26 22:18:15 CST"
## 
## [[2]]$data
## [1] "I'm block 2"
## 
## [[2]]$parent_hash
## [1] "0009e30900402cf2af6f7372a74dfb819974b96ec23a49cab38e9417bad5bb47"
## 
## [[2]]$nonce
## [1] 8460
## 
## [[2]]$hash
## [1] "000fca879c154c30ac35871dd5748f0d80aa54593fe13356bebb5eb3cf02c737"
## 
## 
## [[3]]
## [[3]]$number
## [1] 3
## 
## [[3]]$timestamp
## [1] "2023-09-26 22:18:15 CST"
## 
## [[3]]$data
## [1] "I'm block 3"
## 
## [[3]]$parent_hash
## [1] "000fca879c154c30ac35871dd5748f0d80aa54593fe13356bebb5eb3cf02c737"
## 
## [[3]]$nonce
## [1] 8558
## 
## [[3]]$hash
## [1] "0000a8528a22dd98ebfa90c700391361778119499483a3a1fd0a8f3b0c69d018"

Transactions

通常,块的数据字段包含一定数量的事务。每个事务都有一个发送方、一个接收方和一个从发送方传输到接收方的加密货币值。此外,它还包含交易费用。事务存储在一个挂起的事务池中,每个挖掘的块将包括一个(适当的)挂起的事务子集。块的挖掘者获得所有被阻塞事务的费用以及固定的挖掘奖励(这个事务包含在以下块中)。这就是在区块链经济中引入新硬币的方式。

让我们创建一个虚构的挂起事务池,我们使用 tibble 包将它们存储在一个数据框架结构中。

交易事务集合

  1. 发送方
  2. 接收方
  3. 加密货币的值
  4. 交易费用
library(tibble)
ntrx = 10
sender = sample(LETTERS, ntrx)
receiver = sample(LETTERS, ntrx)
value = round(runif(n = ntrx, min = 0, max = 100), 0)
fee = round(runif(n = ntrx, min = 0, max = 1), 2)
(transactions = tibble(sender, receiver, value, fee))
## # A tibble: 10 × 4
##    sender receiver value   fee
##    <chr>  <chr>    <dbl> <dbl>
##  1 U      F           10  0.2 
##  2 N      R           95  0.14
##  3 Q      I           74  0.99
##  4 R      P           24  0.58
##  5 X      Z           21  0.24
##  6 Y      G           74  0.91
##  7 F      E            8  0.21
##  8 S      H           45  0.83
##  9 D      J           92  0.41
## 10 O      D            3  0.66

让我们更新挖掘函数。每个区块将包括最有利可图的交易(最高的费用)在未决的。该区块的采矿者将获得该区块交易费用总额的奖励。奖励事务将插入到下一个块中。

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mine <- function(previous_block, transactions, difficulty = 3, block_size = 3, miner = "Z", genesis = FALSE){
  # filter transactions to add
  trans_pending = arrange(transactions, -fee)
  if (nrow(trans_pending) < block_size) {
    trans_to_add = trans_pending
    trans_pending = tibble()
  } else {
    trans_to_add =  filter(trans_pending, row_number() <= block_size) 
    trans_pending = filter(trans_pending, row_number() > block_size) 
  }
  
  if (genesis) {
    # define genesis block
    new_block <-  list(number = 1,
                       timestamp = Sys.time(),
                       data = trans_to_add,
                       parent_hash = "0")  
  } else {
    # create new block
    new_block <- list(number = previous_block$number + 1,
                      timestamp = Sys.time(),
                      data = trans_to_add,
                      parent_hash = previous_block$hash)
  }
  
  # add nonce with PoW
  new_block$nonce <- proof_of_work(new_block, difficulty)$nonce
  # add hash 
  new_block$hash <- digest(new_block, "sha256")
  # add reward transaction
  trans_pending = rbind(trans_pending, data.frame(sender = NA, receiver = miner, value = sum(new_block$data$fee), fee = 0.01))
  return(list(block = new_block, transactions = trans_pending))
}


blockchained = function(transactions, difficulty = 3, block_size = 3, nblocks = 3) {
  # define genesis block
  mined = mine(NULL, transactions, difficulty, block_size, miner = "Z", genesis = TRUE)
  block_genesis <- mined$block
  pending = mined$transactions
  # first block is the genesis block
  blockchain <- list(block_genesis)

  if (nblocks >= 2) {
    # add blocks to the chain
    for (i in 2:nblocks){
      mined <- mine(blockchain[[i-1]], pending, difficulty, block_size, miner = "Z")
      blockchain[[i]] <- mined$block
      pending = mined$transactions
    }
  }
  
  return(blockchain)
}

blockchained(transactions, nblocks = 3, block_size = 5)
## [[1]]
## [[1]]$number
## [1] 1
## 
## [[1]]$timestamp
## [1] "2023-09-26 22:18:15 CST"
## 
## [[1]]$data
## # A tibble: 5 × 4
##   sender receiver value   fee
##   <chr>  <chr>    <dbl> <dbl>
## 1 Q      I           74  0.99
## 2 Y      G           74  0.91
## 3 S      H           45  0.83
## 4 O      D            3  0.66
## 5 R      P           24  0.58
## 
## [[1]]$parent_hash
## [1] "0"
## 
## [[1]]$nonce
## [1] 3682
## 
## [[1]]$hash
## [1] "00052d860bfd0968d5d8a56143f2ad5fe2c9a76bbdf80f5f9cdfafb4997f610e"
## 
## 
## [[2]]
## [[2]]$number
## [1] 2
## 
## [[2]]$timestamp
## [1] "2023-09-26 22:18:16 CST"
## 
## [[2]]$data
## # A tibble: 5 × 4
##   sender receiver value   fee
##   <chr>  <chr>    <dbl> <dbl>
## 1 D      J           92  0.41
## 2 X      Z           21  0.24
## 3 F      E            8  0.21
## 4 U      F           10  0.2 
## 5 N      R           95  0.14
## 
## [[2]]$parent_hash
## [1] "00052d860bfd0968d5d8a56143f2ad5fe2c9a76bbdf80f5f9cdfafb4997f610e"
## 
## [[2]]$nonce
## [1] 4574
## 
## [[2]]$hash
## [1] "000322fc5b07d5e278baf7497d7ba15da1778dbc70213fa6e77265ff4187bf17"
## 
## 
## [[3]]
## [[3]]$number
## [1] 3
## 
## [[3]]$timestamp
## [1] "2023-09-26 22:18:16 CST"
## 
## [[3]]$data
## # A tibble: 2 × 4
##   sender receiver value   fee
##   <chr>  <chr>    <dbl> <dbl>
## 1 <NA>   Z         3.97  0.01
## 2 <NA>   Z         1.2   0.01
## 
## [[3]]$parent_hash
## [1] "000322fc5b07d5e278baf7497d7ba15da1778dbc70213fa6e77265ff4187bf17"
## 
## [[3]]$nonce
## [1] 719
## 
## [[3]]$hash
## [1] "000e4fced0ff05d00fe6a0136303fde13bd67c6d1eac5dbea3e466462132c85e"

Digital signature

我们如何确保交易是真实的?区块链使用非对称加密技术实现事务的数字签名。每个事务都由发送方使用其私钥进行签名,任何人都可以使用发送方的公钥验证签名(和事务)的真实性。

公开密钥加密密码或非对称密码是任何使用成对密码匙的密码系统: 公开密码匙可广泛传播,而私人密码匙只有拥有人才知道。这实现了两个功能: 身份验证(公钥验证配对私钥的持有者发送了消息)和加密(只有配对私钥的持有者才能解密用公钥加密的消息)。公钥加密系统的强度依赖于从配对公钥中找到私钥所需的计算工作。

在公钥加密系统中,任何人都可以使用接收方的公钥对消息进行加密。加密信息只能用接收方的私钥解密。在公钥签名系统中,人们可以将消息与私钥结合起来,在消息上创建一个简短的数字签名。任何拥有相应公钥的人都可以将已签署的消息和已知的公钥组合起来,以验证消息上的签名是否有效,即由相应私钥的所有者签名。更改消息,甚至更换一个字母,都会导致验证失败。

里维斯特-沙米尔-阿德尔曼是最早的公钥密码体制之一,广泛用于安全数据传输。在 RSA 中,不对称性是基于两个大素数的乘积分解的实际困难。

加密和数字签名示例:

library(openssl)
## Linking to: OpenSSL 1.1.1m  14 Dec 2021
## 
## Attaching package: 'openssl'
## The following object is masked from 'package:digest':
## 
##     sha1
# encryption 加密
# generate a private key (key) and a public key (pubkey)
key <- rsa_keygen(512)
pubkey <- key$pubkey
# message
msg <- charToRaw("Blockchain is terrific!")
# cipher the message with public key
ciphermsg <- rsa_encrypt(msg, pubkey)
# decrypt the message with private key
rawToChar(rsa_decrypt(ciphermsg, key))
## [1] "Blockchain is terrific!"

签名

# signature
# generate a private key (key) and a public key (pubkey)
key <- rsa_keygen()
pubkey <- key$pubkey
# build a transaction
trans = list(sender = "A", receiver = "B", amount = "100")
# serialize data
data <- serialize(trans, NULL)
# sign (a hash of) the transaction with private key
sig <- signature_create(data, sha256, key = key)
# verify the message with public key
signature_verify(data, sig, sha256, pubkey = pubkey)
## [1] TRUE

签名和加密

# signature and encryption
# generate a private key (key) and a public key (pubkey)
key <- rsa_keygen()
pubkey <- key$pubkey
# build a transaction
trans = list(sender = "A", receiver = "B", value = "100")
# serialize data
data <- serialize(trans, NULL)
# sign (a hash of) the transaction with private key
sig <- signature_create(data, sha256, key = key)
# cipher the transaction with public key
ciphermsg <- rsa_encrypt(data, pubkey)
# verify the message with public key
signature_verify(data, sig, sha256, pubkey = pubkey)
## [1] TRUE
# decrypt and unserialize the transation with private key
unserialize(rsa_decrypt(ciphermsg, key))
## $sender
## [1] "A"
## 
## $receiver
## [1] "B"
## 
## $value
## [1] "100"

Network

最后,区块链分类帐是通过对等网络分布的。运行网络的步骤如下:

  1. 向所有的节点传播transaction
  2. 每一个块都包涵新交易
  3. 每一个节点都需要去计算该节点的Pow
  4. 当一个节点找到Pow的时候, 将块广播到所有节点
  5. 节点只有块中的所有交易都是有效并且尚未使用才接受该块
  6. 节点通过使用接受的块的哈希作为前一个哈希来创建链中的下一个块,从而表示它们接受该块。

节点始终认为最长的链是正确的,并将继续努力扩展它。奖励的激励可能有助于鼓励节点保持诚实。如果一个贪婪的攻击者能够聚集比所有诚实的节点更多的 CPU 能量,他将不得不选择使用它来欺骗人们偷回他的付款,或使用它来生成新的硬币。他应该会发现,遵守规则(生成新的硬币)比破坏体制和自身财富的有效性更有利可图。这些规则对他有利,使他获得的新硬币比其他所有人加起来都多。

https://users.dimi.uniud.it/~massimo.franceschet/HEX0x6C/blockchain/blockchainR.html