Below script is to visualize mobility between different forms of tenure 2016. It should get you what you see above.
NOTICE TO MARINERS: Results are presented in Swedish, which is a secret language spoken only by a few, sad bastards somwhere in the north. If you find this interesting but confusing, email me.
prescript
#If you, like me, run your code in RStudio, this could save you 1.5 seconds :-):
library(rstudioapi)
setwd(dirname(rstudioapi::callFun("getActiveDocumentContext")$path))
script
#Clear the workspace
rm(list=ls())
df2016 <- read.csv("circle2016.csv")
s1 <- by(df2016$value, df2016$from, summary) #summary by
dfSUM1 <- data.frame(matrix(unlist(s1), nrow=length(s1), byrow=T)) #by is a list. Turn into df.
dfSUM1$from <- as.factor(c("Från bostadsrätt", "Från hyresrätt", #add var names
"Från utlandet", "Från äganderätt", "Från övrigt"))
#change column names
colnames(dfSUM1)[1] <- "Min." #rename
colnames(dfSUM1)[2] <- "1st Qu." #rename
colnames(dfSUM1)[3] <- "Median" #rename
colnames(dfSUM1)[4] <- "Mean" #rename
colnames(dfSUM1)[5] <- "3rd Qu." #rename
colnames(dfSUM1)[6] <- "Max." #rename
dfSUM1 <- dfSUM1[,c(7,1,2,3,4,5,6)] #var names first...
knitr::kable(dfSUM1)
from | Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
---|---|---|---|---|---|---|
Från bostadsrätt | 9488 | 14106.00 | 79935 | 58358.40 | 83939 | 104324 |
Från hyresrätt | 17796 | 32487.00 | 97260 | 136130.40 | 134012 | 399097 |
Från utlandet | 19294 | 26194.75 | 31360 | 40750.75 | 45916 | 80989 |
Från äganderätt | 13472 | 21021.00 | 80729 | 79323.20 | 135286 | 146108 |
Från övrigt | 5136 | 10437.00 | 10591 | 16492.60 | 16438 | 39861 |
s2 <- by(df2016$value, df2016$to, summary) #summary by
dfSUM2 <- data.frame(matrix(unlist(s2), nrow=length(s2), byrow=T)) #by is a list. Turn into df.
dfSUM2$to <- as.factor(c("Till bostadsrätt", "Till hyresrätt", #add var names
"Till utlandet", "Till äganderätt", "Till övrigt"))
#change column names
colnames(dfSUM2)[1] <- "Min." #rename
colnames(dfSUM2)[2] <- "1st Qu." #rename
colnames(dfSUM2)[3] <- "Median" #rename
colnames(dfSUM2)[4] <- "Mean" #rename
colnames(dfSUM2)[5] <- "3rd Qu." #rename
colnames(dfSUM2)[6] <- "Max." #rename
dfSUM2 <- dfSUM2[,c(7,1,2,3,4,5,6)] #var names first...
knitr::kable(dfSUM2)
to | Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
---|---|---|---|---|---|---|
Till bostadsrätt | 10437 | 19294 | 80729 | 62408.8 | 97260 | 104324 |
Till hyresrätt | 39861 | 79935 | 80989 | 149198.0 | 146108 | 399097 |
Till utlandet | 5136 | 8400 | 11480 | 11473.0 | 14553 | 17796 |
Till äganderätt | 16438 | 28495 | 83939 | 79634.0 | 134012 | 135286 |
Till övrigt | 10591 | 14106 | 21021 | 22486.0 | 32487 | 34225 |
#or, less code involved:
library(skimr)
library(dplyr)
skim_with(integer = list(hist = NULL)) #the otherwise wonderfull histograms does not render well in markdown.
df2016 %>%
dplyr::group_by(from) %>%
skim(value)
## Skim summary statistics
## n obs: 24
## n variables: 3
## group variables: from
##
## -- Variable type:integer -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## from variable missing complete n mean sd p0
## Från bostadsrätt value 0 5 5 58358.4 43529.71 9488
## Från hyresrätt value 0 5 5 136130.4 154444.01 17796
## Från utlandet value 0 4 4 40750.75 27521.49 19294
## Från äganderätt value 0 5 5 79323.2 61906.28 13472
## Från övrigt value 0 5 5 16492.6 13661.59 5136
## p25 p50 p75 p100
## 14106 79935 83939 1e+05
## 32487 97260 134012 4e+05
## 26194.75 31360 45916 80989
## 21021 80729 135286 146108
## 10437 10591 16438 39861
df2016 %>%
dplyr::group_by(to) %>%
skim(value)
## Skim summary statistics
## n obs: 24
## n variables: 3
## group variables: to
##
## -- Variable type:integer -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## to variable missing complete n mean sd p0
## Till bostadsrätt value 0 5 5 62408.8 44348.26 10437
## Till hyresrätt value 0 5 5 149198 144795.95 39861
## Till utlandet value 0 4 4 11473 5418.31 5136
## Till äganderätt value 0 5 5 79634 56306.25 16438
## Till övrigt value 0 5 5 22486 10626.45 10591
## p25 p50 p75 p100
## 19294 80729 97260 1e+05
## 79935 80989 146108 4e+05
## 8400 11480 14553 17796
## 28495 83939 134012 135286
## 14106 21021 32487 34225
#This package is awesome.
library(circlize)
#colors...
grid.col <- c("#378A5F", "#D1C843", "#BDBDBD", "#6B1919", "#5EC6E0",
"#378A5F", "#D1C843", "#BDBDBD", "#6B1919", "#5EC6E0") #match no. of levels
link.col <- c("#378A5F", "#378A5F","#378A5F","#378A5F","#378A5F",
"#D1C843", "#D1C843","#D1C843","#D1C843","#D1C843",
"#BDBDBD", "#BDBDBD","#BDBDBD","#BDBDBD","#BDBDBD",
"#6B1919", "#6B1919","#6B1919","#6B1919","#6B1919",
"#5EC6E0", "#5EC6E0","#5EC6E0","#5EC6E0") #match no. of rows in the dataframe
circos.clear()
#some stuff we set first with the circos.par(...):
circos.par(start.degree = 90, clock.wise = FALSE,
gap.after = c(rep(5, length(unique(df2016[[1]]))-1), 15,
rep(5, length(unique(df2016[[2]]))-1), 15)
)
#...and some stuff we set directly in the chord function:
chordDiagram(df2016, directional = 1, direction.type = c("diffHeight", "arrows"),
diffHeight = -uh(4, "mm"),
link.arr.type = "big.arrow",
order = c("Från bostadsrätt", "Från hyresrätt", "Från äganderätt", "Från övrigt", "Från utlandet",
"Till bostadsrätt", "Till hyresrätt", "Till äganderätt", "Till övrigt", "Till utlandet"),
grid.col = grid.col, col = link.col)
circos.clear()
GMY
## [1] "MYA"