library(readr)
banzuke <- read_csv("banzuke.csv")
## Rows: 170406 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): rank, wrestler, training_qtrs, birthplace, birthdate, prev
## dbl (6): tournament, id, height_cm, weight_kg, prev_w, prev_l
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(banzuke)

The first thing I want to do is make two more columns for weight in pounds and height in inches. This will increase the number of columns from 12 to 14, as we see in the results after calling for dimensions.

banzuke<-banzuke%>%
  mutate(weight_lbs = weight_kg * 2.2,
         height_in = height_cm * 0.3937)

dim(banzuke)
## [1] 170406     14

Now I’m going to make a graph with a single variable, weight. This will give us an idea of how heavy sumo wrestlers are, Here, weight is on the y axis. We can see that most wrestlers weigh around 300 pounds.

ggplot(banzuke, aes(weight_lbs)) + 
   geom_histogram(aes(y = ..density..), alpha = 0.5) +
  geom_density(alpha = 0.2, fill="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5174 rows containing non-finite values (stat_bin).
## Warning: Removed 5174 rows containing non-finite values (stat_density).

i perform the same code, but here I am using height. THe tallest wrestlers are in the 70 inch region, which is 5.8 ft.

ggplot(banzuke, aes(height_in)) + 
 geom_histogram(aes(y = ..density..), alpha = 0.5) +
  geom_density(alpha = 0.2, fill="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5174 rows containing non-finite values (stat_bin).
## Warning: Removed 5174 rows containing non-finite values (stat_density).

Let’s make another graph, this time utlizing both height and weight as variables.

ggplot(banzuke, aes(weight_lbs, height_in)) +
   geom_point() +
  geom_smooth() +
  theme_minimal() +
  labs(title = "Height and weight of sumo wrestlers", y = "height", x = "weight")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5174 rows containing non-finite values (stat_smooth).
## Warning: Removed 5174 rows containing missing values (geom_point).