library(dplyr)
library(plotly)
options(stringsAsFactors = FALSE)

 
 
 
Three years, 18 tournaments:

tournament <- apply(
    expand.grid(
        2014:2016, # years
        sprintf("%02d", seq(1, 11, 2)) # six tournaments a year
    ),
    1,
    paste, collapse = "."
)
  
m <- matrix(tournament[order(tournament)], ncol = 6, byrow = TRUE)
  
prmatrix(
    m,
    rowlab = rep("", ncol(m)),
    collab = rep("", ncol(m)),
    quote = FALSE
)
                                                
 2014.01 2014.03 2014.05 2014.07 2014.09 2014.11
 2015.01 2015.03 2015.05 2015.07 2015.09 2015.11
 2016.01 2016.03 2016.05 2016.07 2016.09 2016.11

 
 
 
Fetch makuuchi (top division) results from Cervus1983/sumodb repository:

bout <- do.call(
    rbind,
    lapply(
        tournament,
        function(x) read.csv(
            paste0("https://raw.githubusercontent.com/Cervus1983/sumodb/master/CSV/", x, ".results.csv")
        )
    )
)
  
bout

 
 
 
Most frequent kimarite is yorikiri — 31.3% of all bouts (including forfeits — fusen):

bout %>% count(kimarite) %>% arrange(-n)

 
 
 
Banzuke for each tournament:

banzuke <- do.call(
    rbind,
    lapply(
        tournament,
        function(x) read.csv(
            paste0("https://raw.githubusercontent.com/Cervus1983/sumodb/master/CSV/", x, ".banzuke.csv")
        )
    )
)
  
banzuke

 
 
 
Height & weight distribution:

banzuke %>%
    group_by(rikishi) %>%
    summarise(height = mean(height)) %>%
    plot_ly(width = 910) %>%
    add_histogram(
        x = ~height,
        xbins = list(
            start = floor(min(banzuke$height)),
            end = ceiling(max(banzuke$height)),
            size = 1
        )
    ) %>%
    layout(
        xaxis = list(title = "Height (cm)"),
        yaxis = list(
            showgrid = FALSE,
            showticklabels = FALSE
        )
    )

banzuke %>%
    group_by(rikishi) %>%
    summarise(weight = mean(weight)) %>%
    plot_ly(width = 910) %>%
    add_histogram(
        x = ~weight,
        xbins = list(
            start = floor(min(banzuke$weight)),
            end = ceiling(max(banzuke$weight)),
            size = 5
        )
    ) %>%
    layout(
        xaxis = list(title = "Weight (kg)"),
        yaxis = list(
            showgrid = FALSE,
            showticklabels = FALSE
        )
    )

 
 
 
Add height & weight data to bout results, group less frequent kimarite as other, and plot kimarite distribution:

bout2 <- left_join(
    left_join(
        bout %>% mutate(rikishi = ifelse(win1 == 1, shikona1, shikona2)),
        banzuke
    ) %>%
        select(-rank) %>%
        rename(winner.height = height, winner.weight = weight) %>%
        mutate(rikishi = ifelse(win1 == 0, shikona1, shikona2)),
    banzuke
) %>%
    select(-rank) %>%
    rename(loser.height = height, loser.weight = weight)
  
bout3 <- inner_join(
    bout2,
    bout2 %>%
        count(kimarite) %>%
        arrange(-n) %>%
        mutate(
            freq_rank = row_number(),
            kimarite2 = ifelse(freq_rank > 7, "other", kimarite) # top 7 kimarite, everything else as "other"
        ) %>%
        select(kimarite, kimarite2)
) %>% filter(
        complete.cases(.), # omit records with missing weight/height
        kimarite != "fusen" # omit forfeits
    )
  
bout3 %>%
    plot_ly(width = 910) %>%
    add_markers(
        x = ~winner.height,
        y = ~loser.height,
        color = ~kimarite2,
        hoverinfo = "text",
        text = ~paste(basho, "~", shikona1, "v", shikona2, "~", kimarite)
    ) %>%
    layout(
        xaxis = list(
            title = "Winner's height"
        ),
        yaxis = list(
            title = "Loser's height"
        )
    )

bout3 %>%
    plot_ly(width = 910) %>%
    add_markers(
        x = ~winner.weight,
        y = ~loser.weight,
        color = ~kimarite2,
        hoverinfo = "text",
        text = ~paste(basho, "~", shikona1, "v", shikona2, "~", kimarite)
    ) %>%
    layout(
        xaxis = list(
            title = "Winner's weight"
        ),
        yaxis = list(
            title = "Loser's weight"
        )
    )

 

LS0tDQp0aXRsZTogIkRpc3RyaWJ1dGlvbiBvZiBraW1hcml0ZSAod2lubmluZyB0ZWNobmlxdWVzKSBieSB3ZWlnaHQgYW5kIGhlaWdodCBvZiByaWtpc2hpIChzdW1vIHdyZXN0bGVycykiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHBsb3RseSkNCm9wdGlvbnMoc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQ0KYGBgDQoNCiZuYnNwOyAgDQombmJzcDsgIA0KJm5ic3A7ICANClRocmVlIHllYXJzLCBgciAzICogNmAgdG91cm5hbWVudHM6DQpgYGB7cn0NCnRvdXJuYW1lbnQgPC0gYXBwbHkoDQoJZXhwYW5kLmdyaWQoDQoJCTIwMTQ6MjAxNiwgIyB5ZWFycw0KCQlzcHJpbnRmKCIlMDJkIiwgc2VxKDEsIDExLCAyKSkgIyBzaXggdG91cm5hbWVudHMgYSB5ZWFyDQoJKSwNCgkxLA0KCXBhc3RlLCBjb2xsYXBzZSA9ICIuIg0KKQ0KICANCm0gPC0gbWF0cml4KHRvdXJuYW1lbnRbb3JkZXIodG91cm5hbWVudCldLCBuY29sID0gNiwgYnlyb3cgPSBUUlVFKQ0KICANCnBybWF0cml4KA0KCW0sDQoJcm93bGFiID0gcmVwKCIiLCBuY29sKG0pKSwNCgljb2xsYWIgPSByZXAoIiIsIG5jb2wobSkpLA0KCXF1b3RlID0gRkFMU0UNCikNCg0KYGBgDQoNCiZuYnNwOyAgDQombmJzcDsgIA0KJm5ic3A7ICANCkZldGNoIFttYWt1dWNoaV0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvTWFrdXVjaGkpICh0b3AgZGl2aXNpb24pIHJlc3VsdHMgZnJvbSBbQ2VydnVzMTk4My9zdW1vZGJdKGh0dHBzOi8vZ2l0aHViLmNvbS9DZXJ2dXMxOTgzL3N1bW9kYikgcmVwb3NpdG9yeToNCmBgYHtyfQ0KYm91dCA8LSBkby5jYWxsKA0KCXJiaW5kLA0KCWxhcHBseSgNCgkJdG91cm5hbWVudCwNCgkJZnVuY3Rpb24oeCkgcmVhZC5jc3YoDQoJCQlwYXN0ZTAoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9DZXJ2dXMxOTgzL3N1bW9kYi9tYXN0ZXIvQ1NWLyIsIHgsICIucmVzdWx0cy5jc3YiKQ0KCQkpDQoJKQ0KKQ0KICANCmJvdXQNCmBgYA0KDQombmJzcDsgIA0KJm5ic3A7ICANCiZuYnNwOyAgDQpNb3N0IGZyZXF1ZW50IFtraW1hcml0ZV0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvS2ltYXJpdGUpIGlzIGByIHNwcmludGYoIiVzIiwgKGJvdXQgJT4lIGNvdW50KGtpbWFyaXRlKSAlPiUgYXJyYW5nZSgtbikpWzEsIDFdKWAgLS0tIGByIHNwcmludGYoIiUuMWYlJSIsIChib3V0ICU+JSBjb3VudChraW1hcml0ZSkgJT4lIGFycmFuZ2UoLW4pKVsxLCAyXSAvIG5yb3coYm91dCkgKiAxMDApYCBvZiBhbGwgYm91dHMgKGluY2x1ZGluZyBmb3JmZWl0cyAtLS0gW2Z1c2VuXShodHRwczovL2VuLndpa3Rpb25hcnkub3JnL3dpa2kvZnVzZW4pKToNCmBgYHtyfQ0KYm91dCAlPiUgY291bnQoa2ltYXJpdGUpICU+JSBhcnJhbmdlKC1uKQ0KYGBgDQoNCiZuYnNwOyAgDQombmJzcDsgIA0KJm5ic3A7ICANCltCYW56dWtlXShodHRwczovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9CYW56dWtlKSBmb3IgZWFjaCB0b3VybmFtZW50Og0KYGBge3J9DQpiYW56dWtlIDwtIGRvLmNhbGwoDQoJcmJpbmQsDQoJbGFwcGx5KA0KCQl0b3VybmFtZW50LA0KCQlmdW5jdGlvbih4KSByZWFkLmNzdigNCgkJCXBhc3RlMCgiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL0NlcnZ1czE5ODMvc3Vtb2RiL21hc3Rlci9DU1YvIiwgeCwgIi5iYW56dWtlLmNzdiIpDQoJCSkNCgkpDQopDQogIA0KYmFuenVrZQ0KYGBgDQoNCiZuYnNwOyAgDQombmJzcDsgIA0KJm5ic3A7ICANCkhlaWdodCAmIHdlaWdodCBkaXN0cmlidXRpb246DQpgYGB7cn0NCmJhbnp1a2UgJT4lDQoJZ3JvdXBfYnkocmlraXNoaSkgJT4lDQoJc3VtbWFyaXNlKGhlaWdodCA9IG1lYW4oaGVpZ2h0KSkgJT4lDQoJcGxvdF9seSh3aWR0aCA9IDkxMCkgJT4lDQoJYWRkX2hpc3RvZ3JhbSgNCgkJeCA9IH5oZWlnaHQsDQoJCXhiaW5zID0gbGlzdCgNCgkJCXN0YXJ0ID0gZmxvb3IobWluKGJhbnp1a2UkaGVpZ2h0KSksDQoJCQllbmQgPSBjZWlsaW5nKG1heChiYW56dWtlJGhlaWdodCkpLA0KCQkJc2l6ZSA9IDENCgkJKQ0KCSkgJT4lDQoJbGF5b3V0KA0KCQl4YXhpcyA9IGxpc3QodGl0bGUgPSAiSGVpZ2h0IChjbSkiKSwNCgkJeWF4aXMgPSBsaXN0KA0KCQkJc2hvd2dyaWQgPSBGQUxTRSwNCgkJCXNob3d0aWNrbGFiZWxzID0gRkFMU0UNCgkJKQ0KCSkNCg0KYmFuenVrZSAlPiUNCglncm91cF9ieShyaWtpc2hpKSAlPiUNCglzdW1tYXJpc2Uod2VpZ2h0ID0gbWVhbih3ZWlnaHQpKSAlPiUNCglwbG90X2x5KHdpZHRoID0gOTEwKSAlPiUNCglhZGRfaGlzdG9ncmFtKA0KCQl4ID0gfndlaWdodCwNCgkJeGJpbnMgPSBsaXN0KA0KCQkJc3RhcnQgPSBmbG9vcihtaW4oYmFuenVrZSR3ZWlnaHQpKSwNCgkJCWVuZCA9IGNlaWxpbmcobWF4KGJhbnp1a2Ukd2VpZ2h0KSksDQoJCQlzaXplID0gNQ0KCQkpDQoJKSAlPiUNCglsYXlvdXQoDQoJCXhheGlzID0gbGlzdCh0aXRsZSA9ICJXZWlnaHQgKGtnKSIpLA0KCQl5YXhpcyA9IGxpc3QoDQoJCQlzaG93Z3JpZCA9IEZBTFNFLA0KCQkJc2hvd3RpY2tsYWJlbHMgPSBGQUxTRQ0KCQkpDQoJKQ0KYGBgDQoNCiZuYnNwOyAgDQombmJzcDsgIA0KJm5ic3A7ICANCkFkZCBoZWlnaHQgJiB3ZWlnaHQgZGF0YSB0byBib3V0IHJlc3VsdHMsIGdyb3VwIGxlc3MgZnJlcXVlbnQga2ltYXJpdGUgYXMgKm90aGVyKiwgYW5kIHBsb3Qga2ltYXJpdGUgZGlzdHJpYnV0aW9uOg0KYGBge3IsIG1lc3NhZ2U9RkFMU0V9DQpib3V0MiA8LSBsZWZ0X2pvaW4oDQoJbGVmdF9qb2luKA0KCQlib3V0ICU+JSBtdXRhdGUocmlraXNoaSA9IGlmZWxzZSh3aW4xID09IDEsIHNoaWtvbmExLCBzaGlrb25hMikpLA0KCQliYW56dWtlDQoJKSAlPiUNCgkJc2VsZWN0KC1yYW5rKSAlPiUNCgkJcmVuYW1lKHdpbm5lci5oZWlnaHQgPSBoZWlnaHQsIHdpbm5lci53ZWlnaHQgPSB3ZWlnaHQpICU+JQ0KCQltdXRhdGUocmlraXNoaSA9IGlmZWxzZSh3aW4xID09IDAsIHNoaWtvbmExLCBzaGlrb25hMikpLA0KCWJhbnp1a2UNCikgJT4lDQoJc2VsZWN0KC1yYW5rKSAlPiUNCglyZW5hbWUobG9zZXIuaGVpZ2h0ID0gaGVpZ2h0LCBsb3Nlci53ZWlnaHQgPSB3ZWlnaHQpDQogIA0KYm91dDMgPC0gaW5uZXJfam9pbigNCglib3V0MiwNCglib3V0MiAlPiUNCgkJY291bnQoa2ltYXJpdGUpICU+JQ0KCQlhcnJhbmdlKC1uKSAlPiUNCgkJbXV0YXRlKA0KCQkJZnJlcV9yYW5rID0gcm93X251bWJlcigpLA0KCQkJa2ltYXJpdGUyID0gaWZlbHNlKGZyZXFfcmFuayA+IDcsICJvdGhlciIsIGtpbWFyaXRlKSAjIHRvcCA3IGtpbWFyaXRlLCBldmVyeXRoaW5nIGVsc2UgYXMgIm90aGVyIg0KCQkpICU+JQ0KCQlzZWxlY3Qoa2ltYXJpdGUsIGtpbWFyaXRlMikNCikgJT4lIGZpbHRlcigNCgkJY29tcGxldGUuY2FzZXMoLiksICMgb21pdCByZWNvcmRzIHdpdGggbWlzc2luZyB3ZWlnaHQvaGVpZ2h0DQoJCWtpbWFyaXRlICE9ICJmdXNlbiIgIyBvbWl0IGZvcmZlaXRzDQoJKQ0KICANCmJvdXQzICU+JQ0KCXBsb3RfbHkod2lkdGggPSA5MTApICU+JQ0KCWFkZF9tYXJrZXJzKA0KCQl4ID0gfndpbm5lci5oZWlnaHQsDQoJCXkgPSB+bG9zZXIuaGVpZ2h0LA0KCQljb2xvciA9IH5raW1hcml0ZTIsDQoJCWhvdmVyaW5mbyA9ICJ0ZXh0IiwNCgkJdGV4dCA9IH5wYXN0ZShiYXNobywgIn4iLCBzaGlrb25hMSwgInYiLCBzaGlrb25hMiwgIn4iLCBraW1hcml0ZSkNCgkpICU+JQ0KCWxheW91dCgNCgkJeGF4aXMgPSBsaXN0KA0KCQkJdGl0bGUgPSAiV2lubmVyJ3MgaGVpZ2h0Ig0KCQkpLA0KCQl5YXhpcyA9IGxpc3QoDQoJCQl0aXRsZSA9ICJMb3NlcidzIGhlaWdodCINCgkJKQ0KCSkNCg0KYm91dDMgJT4lDQoJcGxvdF9seSh3aWR0aCA9IDkxMCkgJT4lDQoJYWRkX21hcmtlcnMoDQoJCXggPSB+d2lubmVyLndlaWdodCwNCgkJeSA9IH5sb3Nlci53ZWlnaHQsDQoJCWNvbG9yID0gfmtpbWFyaXRlMiwNCgkJaG92ZXJpbmZvID0gInRleHQiLA0KCQl0ZXh0ID0gfnBhc3RlKGJhc2hvLCAifiIsIHNoaWtvbmExLCAidiIsIHNoaWtvbmEyLCAifiIsIGtpbWFyaXRlKQ0KCSkgJT4lDQoJbGF5b3V0KA0KCQl4YXhpcyA9IGxpc3QoDQoJCQl0aXRsZSA9ICJXaW5uZXIncyB3ZWlnaHQiDQoJCSksDQoJCXlheGlzID0gbGlzdCgNCgkJCXRpdGxlID0gIkxvc2VyJ3Mgd2VpZ2h0Ig0KCQkpDQoJKQ0KYGBgDQoNCiZuYnNwOyAg