Let’s start at the end

Below script is to visualize mobility between different forms of tenure 2016. It should get you what you see above.

NOTICE TO MARINERS: Results are presented in Swedish, which is a secret language spoken only by a few, sad bastards somwhere in the north. If you find this interesting but confusing, email me.

prescript

#If you, like me, run your code in RStudio, this could save you 1.5 seconds :-):

library(rstudioapi)
setwd(dirname(rstudioapi::callFun("getActiveDocumentContext")$path))

script

Data

#Clear the workspace
rm(list=ls())

df2016 <- read.csv("circle2016.csv")

s1 <- by(df2016$value, df2016$from, summary) #summary by
dfSUM1 <- data.frame(matrix(unlist(s1), nrow=length(s1), byrow=T)) #by is a list. Turn into df.
dfSUM1$from <- as.factor(c("Från bostadsrätt", "Från hyresrätt", #add var names
                           "Från utlandet", "Från äganderätt", "Från övrigt"))
#change column names
colnames(dfSUM1)[1] <- "Min." #rename
colnames(dfSUM1)[2] <- "1st Qu." #rename
colnames(dfSUM1)[3] <- "Median" #rename
colnames(dfSUM1)[4] <- "Mean" #rename
colnames(dfSUM1)[5] <- "3rd Qu." #rename
colnames(dfSUM1)[6] <- "Max." #rename

dfSUM1 <- dfSUM1[,c(7,1,2,3,4,5,6)] #var names first...

knitr::kable(dfSUM1)
from Min. 1st Qu. Median Mean 3rd Qu. Max.
Från bostadsrätt 9488 14106.00 79935 58358.40 83939 104324
Från hyresrätt 17796 32487.00 97260 136130.40 134012 399097
Från utlandet 19294 26194.75 31360 40750.75 45916 80989
Från äganderätt 13472 21021.00 80729 79323.20 135286 146108
Från övrigt 5136 10437.00 10591 16492.60 16438 39861
s2 <- by(df2016$value, df2016$to, summary) #summary by
dfSUM2 <- data.frame(matrix(unlist(s2), nrow=length(s2), byrow=T)) #by is a list. Turn into df.
dfSUM2$to <- as.factor(c("Till bostadsrätt", "Till hyresrätt", #add var names
                           "Till utlandet", "Till äganderätt", "Till övrigt"))
#change column names
colnames(dfSUM2)[1] <- "Min." #rename
colnames(dfSUM2)[2] <- "1st Qu." #rename
colnames(dfSUM2)[3] <- "Median" #rename
colnames(dfSUM2)[4] <- "Mean" #rename
colnames(dfSUM2)[5] <- "3rd Qu." #rename
colnames(dfSUM2)[6] <- "Max." #rename

dfSUM2 <- dfSUM2[,c(7,1,2,3,4,5,6)] #var names first...

knitr::kable(dfSUM2)
to Min. 1st Qu. Median Mean 3rd Qu. Max.
Till bostadsrätt 10437 19294 80729 62408.8 97260 104324
Till hyresrätt 39861 79935 80989 149198.0 146108 399097
Till utlandet 5136 8400 11480 11473.0 14553 17796
Till äganderätt 16438 28495 83939 79634.0 134012 135286
Till övrigt 10591 14106 21021 22486.0 32487 34225
#or, less code involved:
library(skimr)
library(dplyr)

skim_with(integer = list(hist = NULL)) #the otherwise wonderfull histograms does not render well in markdown.

df2016 %>%
  dplyr::group_by(from) %>%
  skim(value)
## Skim summary statistics
##  n obs: 24 
##  n variables: 3 
##  group variables: from 
## 
## -- Variable type:integer -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##              from variable missing complete n      mean        sd    p0
##  Från bostadsrätt    value       0        5 5  58358.4   43529.71  9488
##    Från hyresrätt    value       0        5 5 136130.4  154444.01 17796
##     Från utlandet    value       0        4 4  40750.75  27521.49 19294
##   Från äganderätt    value       0        5 5  79323.2   61906.28 13472
##       Från övrigt    value       0        5 5  16492.6   13661.59  5136
##       p25   p50    p75   p100
##  14106    79935  83939  1e+05
##  32487    97260 134012  4e+05
##  26194.75 31360  45916  80989
##  21021    80729 135286 146108
##  10437    10591  16438  39861
df2016 %>%
  dplyr::group_by(to) %>%
  skim(value)
## Skim summary statistics
##  n obs: 24 
##  n variables: 3 
##  group variables: to 
## 
## -- Variable type:integer -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##                to variable missing complete n     mean        sd    p0
##  Till bostadsrätt    value       0        5 5  62408.8  44348.26 10437
##    Till hyresrätt    value       0        5 5 149198   144795.95 39861
##     Till utlandet    value       0        4 4  11473     5418.31  5136
##   Till äganderätt    value       0        5 5  79634    56306.25 16438
##       Till övrigt    value       0        5 5  22486    10626.45 10591
##    p25   p50    p75   p100
##  19294 80729  97260  1e+05
##  79935 80989 146108  4e+05
##   8400 11480  14553  17796
##  28495 83939 134012 135286
##  14106 21021  32487  34225

Let’s circlize it

#This package is awesome.
library(circlize)
#colors...
grid.col <- c("#378A5F", "#D1C843", "#BDBDBD", "#6B1919", "#5EC6E0",
              "#378A5F", "#D1C843", "#BDBDBD", "#6B1919", "#5EC6E0") #match no. of levels
link.col <- c("#378A5F", "#378A5F","#378A5F","#378A5F","#378A5F",
              "#D1C843", "#D1C843","#D1C843","#D1C843","#D1C843",
              "#BDBDBD", "#BDBDBD","#BDBDBD","#BDBDBD","#BDBDBD",
              "#6B1919", "#6B1919","#6B1919","#6B1919","#6B1919",
              "#5EC6E0", "#5EC6E0","#5EC6E0","#5EC6E0") #match no. of rows in the dataframe

circos.clear()
#some stuff we set first with the circos.par(...):
circos.par(start.degree = 90, clock.wise = FALSE,
           gap.after = c(rep(5, length(unique(df2016[[1]]))-1), 15, 
                         rep(5, length(unique(df2016[[2]]))-1), 15)
           )
#...and some stuff we set directly in the chord function:
chordDiagram(df2016, directional = 1, direction.type = c("diffHeight", "arrows"),
             diffHeight = -uh(4, "mm"),
             link.arr.type = "big.arrow",
             order = c("Från bostadsrätt", "Från hyresrätt", "Från äganderätt", "Från övrigt", "Från utlandet",
                       "Till bostadsrätt", "Till hyresrätt", "Till äganderätt", "Till övrigt", "Till utlandet"),
             grid.col = grid.col, col = link.col)

circos.clear()
GMY
## [1] "MYA"