緣起

在臉書看到了這張圖，覺得不是很對勁。

紅藍綠三組人馬財產沒什麼成長，但黃組一飛沖天。這樣的比較方式是否合理？這張圖是否能說明貧富差距以不正常的方式變大了？

還是貧富差距沒擴大，本身選擇資料的方式，就會產生這樣的圖形？

alt text

測試各種功能

引入函數，別管我

library(ggplot2)
library(ineq)

生出兩千個人來，有錢人和窮人一樣多

people = c(1:2000)

hist(people,breaks = 100,xlim = c(0,3000))

qs= quantile(people,probs = 1-c(100,10,5,1,0.1,0.01)/100)
qs

##       0%      90%      95%      99%    99.9%   99.99% 
##    1.000 1800.100 1900.050 1980.010 1998.001 1999.800

找各區間的平均財富分配

tapply(people, findInterval(people, qs), mean)

##      1      2      3      4      5      6 
##  900.5 1850.5 1940.5 1989.5 1999.0 2000.0

定義一個不均度函數來比較是否貧富差距擴大。大家可以觀察到所有人財富成長6倍之後，貧富差距的數字指標大多是不會變的。一個好理由是，貧富指標會設計讓乘法變換不會影響指標數字，所以用台幣算出來的貧富指標還會和用美金算出來的指標數字會一樣，貧富不會因為乘了匯率就變得比較不均或均勻。

iq = function(x){
  df= data.frame(type = c("Gini", "RS", "Atkinson", "Theil", "Kolm", "var", "square.var", "entropy"))

  df[,"before"] = sapply(df[,"type"] , function(t){ineq(x, type=as.character(t))})
  df[,"6x growth"] = sapply(df[,"type"] , function(t){ineq(x*6, type=as.character(t))})
  return(df)
  }
iq(people)

##         type    before 6x growth
## 1       Gini 0.3331667 0.3331667
## 2         RS 0.2498751 0.2498751
## 3   Atkinson 0.1108950 0.1108950
## 4      Theil 0.1928976 0.1928976
## 5       Kolm       Inf       Inf
## 6        var 0.5770617 0.5770617
## 7 square.var 0.3330002 0.3330002
## 8    entropy 0.2283055 0.2283055

Uniform

我們假設每個人投資的功力差不多，看看36年間財富分配。我們以最頂層的人從2000賺到12000來計算財富成長。

people2 = c(1:2000)

iq(people2)

##         type    before 6x growth
## 1       Gini 0.3331667 0.3331667
## 2         RS 0.2498751 0.2498751
## 3   Atkinson 0.1108950 0.1108950
## 4      Theil 0.1928976 0.1928976
## 5       Kolm       Inf       Inf
## 6        var 0.5770617 0.5770617
## 7 square.var 0.3330002 0.3330002
## 8    entropy 0.2283055 0.2283055

compute <- function (people) {
  hist(people,breaks = 200)
  qs= quantile(people,probs = 1-c(100,10,5,1,0.1,0.01)/100,names = FALSE)
  df = data.frame()
  df= rbind(df,tapply(people, findInterval(people, qs), mean))
  colnames(df)<-c("90% below","10%~5%","5%~1%","1%~0.1%","0.1%~0.01%","0.01% above")
  
  for (i in 1:35) {
    people = people* 1.05103 ## this is 6^(1/36)
    qs= quantile(people,probs = 1-c(100,10,5,1,0.1,0.01)/100,names = FALSE)
    df = rbind(df, tapply(people, findInterval(people, qs), mean))
  }
  df2 = df[,c(1,2,3,5)]
  df2[,"time"] = c(1977:2012)

  return(df2)  
}

df = compute(people2)

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))+ geom_line(aes(y = df[,"0.1%~0.01%"] ,col = "0.1%~0.01%"))
    
print(p)

結論，從圖中看起來沒像臉書那麼極端

使用truncate normal

中產階級較多的分配

library(truncnorm)
people3 = rtruncnorm(2000, a=1, b=2000, mean = 1000, sd = 500)

iq(people3)

##         type     before  6x growth
## 1       Gini 0.25408464 0.25408464
## 2         RS 0.18312070 0.18312070
## 3   Atkinson 0.06096582 0.06096582
## 4      Theil 0.10984113 0.10984113
## 5       Kolm        Inf        Inf
## 6        var 0.44357038 0.44357038
## 7 square.var 0.19675468 0.19675468
## 8    entropy 0.12384896 0.12384896

df = compute(people3)

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))+ geom_line(aes(y = df[,"0.1%~0.01%"] ,col = "0.1%~0.01%"))
    
print(p)

結論，從圖中看起來沒像臉書那麼極端

Pareto

傳說中宣稱符合現實世界的分配

library(PtProcess)
people4 = rpareto(2000, 1.0001, 1)

iq(people4) # 是的！有個指標不一樣了！

##         type      before   6x growth
## 1       Gini   0.7840737   0.7840737
## 2         RS   0.6537640   0.6537640
## 3   Atkinson   0.5782494   0.5782494
## 4      Theil   2.4599656   2.4599656
## 5       Kolm   7.3264557  47.3341100
## 6        var  11.4612436  11.4612436
## 7 square.var 131.3601044 131.3601044
## 8    entropy   1.4023069   1.4023069

df = compute(people4)

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))+ geom_line(aes(y = df[,"0.1%~0.01%"] ,col = "0.1%~0.01%"))
    
print(p)

結論，好像真有那麼一點像，遺憾的是我沒辦法很精準的控制分配，讓最頂層的人們在1977年接近2000與2013年接近12000。

拿掉最上面0.1 %

後面三組就會看起來蠻接近的

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))
    
print(p)

結結結結論

如果在1977年，社會財富的分配呈現類似Pareto的分配，即使大家投資功力差不多情況下，過了若干年後，本金多的人累積財富速度快，在數字上當然追過其他人較多囉。

另一個問題：有錢人真的大風大浪沉下去後爬起來快嗎？

0.01啪的人好像大起大落，好像比較會使用資產？

我的看法是，沒錢人本金少，即使是一樣的投資波動，也會看起來沒那麼明顯。

為了驗證這點我們修改一下產生資料的方式

compute2 <- function (people) {
  # hist(people,breaks = 200) #不再重印
  qs= quantile(people,probs = 1-c(100,10,5,1,0.1,0.01)/100,names = FALSE)
  df = data.frame()
  df= rbind(df,tapply(people, findInterval(people, qs), mean))
  colnames(df)<-c("90% below","10%~5%","5%~1%","1%~0.1%","0.1%~0.01%","0.01% above")
  
  for (i in 1:35) {
    people = people* (1.05103 +runif(n = 1, min = -0.5, max = 0.5)) ## this is 6^(1/36)
    qs= quantile(people,probs = 1-c(100,10,5,1,0.1,0.01)/100,names = FALSE)
    df = rbind(df, tapply(people, findInterval(people, qs), mean))
  }
  df2 = df[,c(1,2,3,5)]
  df2[,"time"] = c(1977:2012)

  return(df2)  
}

一樣請Pareto人們出來表演

df = compute2(people4)

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))+ geom_line(aes(y = df[,"0.1%~0.01%"] ,col = "0.1%~0.01%"))
    
print(p)

後面三組

p <- ggplot(df,aes(x=df[,"time"], y = value, color = variable)) + geom_line(aes(y = df[,"90% below"] ,col = "90% below"))+ geom_line(aes(y = df[,"10%~5%"] ,col = "10%~5%"))+ geom_line(aes(y = df[,"5%~1%"] ,col = "5%~1%"))
    
print(p)

main

chihchengliang

2015年04月08日