library(tidyverse)
library(leaflet)
library(mapview)
library(rgdal)
library(lubridate)
fightersLatLong <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/FightersLatLong.csv")
fights <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/Fights_Updated.csv")
a1 <- fightersLatLong %>%
filter(!is.na(Lat))
names(a1) <- c("x1", "name", "birthDate", "age", "birthPlace", "lat", "lon", "country", "height", "weight", "association", "weightClass", "fighterID", "url", "nickName", "feet", "inch", "photo" )
names(fights) <- c("x1", "eventName", "match", "fighter1", "fighter2", "method", "methodD", "Round", "time", "referee", "fighter1url", "fighter2url", "eventID", "date", "location")
separate(a1, col = birthPlace, sep = ",", into = c("city", "state"), remove = FALSE)
## # A tibble: 298 x 20
## x1 name birthDate age birthPlace city state lat lon country
## * <int> <chr> <chr> <int> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 253 John~ 9/12/1983 33 Ada, Okla~ Ada " Ok~ 34.8 -96.7 USA
## 2 1166 Jon ~ 7/7/1986 30 Adams, Ma~ Adams " Ma~ 42.6 -73.1 USA
## 3 1477 Keit~ 4/15/1958 58 Addison, ~ Addi~ " Il~ 41.9 -88.0 USA
## 4 1556 Sean~ 12/4/1975 41 Akron, Oh~ Akron " Oh~ 41.1 -81.5 USA
## 5 2 Dieg~ 12/31/19~ 35 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## 6 51 John~ 9/26/1984 32 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## 7 112 Keit~ 10/31/19~ 41 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## 8 116 Heat~ 9/19/1980 36 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## 9 125 Mich~ 1/6/1986 31 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## 10 318 Carl~ 4/26/1984 32 Albuquerq~ Albu~ " Ne~ 35.1 -107. USA
## # ... with 288 more rows, and 10 more variables: height <int>,
## # weight <int>, association <chr>, weightClass <chr>, fighterID <int>,
## # url <chr>, nickName <chr>, feet <int>, inch <int>, photo <chr>
gloveIcon <- makeIcon(iconUrl = "http://www.mmastore-online.com/media/catalog/product/cache/1/image/600x600/9df78eab33525d08d6e5fb8d27136e95/u/f/ufc_official_fight_gloves_2.png",
iconWidth = 38,
iconHeight = 45,
iconAnchorX = 22,
iconAnchorY = 44)
states <- readOGR("C:/Users/Trevor/Documents/School/_previous/RStudio/cb_2017_us_state_20m.shp",
layer = "cb_2017_us_state_20m", GDAL1_integer64_policy = TRUE)
## OGR data source with driver: ESRI Shapefile
## Source: "C:/Users/Trevor/Documents/School/_previous/RStudio/cb_2017_us_state_20m.shp", layer: "cb_2017_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as doubles: ALAND AWATER
Then filtering the dataset so it only includes American born fighters, creating a ‘city’ and ‘state’ variable, the computing the number of fighters from that state and the percentage of all American born UFC fighters that come from that state.
fightersLatLong2 <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/FightersLatLong2.csv")
names(fightersLatLong2) <- c("x1", "name", "birthDate", "age", "birthPlace", "lat", "lon", "country", "height", "weight", "association", "weightClass", "fighterID", "url", "nickName", "feet", "inch", "photo" )
fighterStates <- filter(fightersLatLong2, country == "USA")
fighterStates2 <- separate(fighterStates, col = birthPlace, sep = ", ", into = c("city", "state"), remove = FALSE) %>%
filter(!is.na(state))%>%
group_by(state) %>%
summarize(N = n()) %>%
mutate(freq = N / sum(N),
pct = log10(freq*100),
pctNoLog = round((freq*100), 5))
There were differences in the list of states between the two data frames so I added rows to the fighterStates2 data frame to assist in the merge.
c2 <- add_row(fighterStates2, state = c("Delaware", "District of Columbia", "Montana", "Puerto Rico", "West Virginia", "Wyoming"), N = c(0,0,0,0,0,0), freq = c(0,0,0,0,0,0), pct = c(0.001,0.001,0.001,0.001,0.001,0.001))
b2 <- merge(states, c2, by.x = "NAME", by.y = "state", all = TRUE)
pal <- colorNumeric(palette = "BuPu", domain = b2$pct)
palB <- colorNumeric(palette = "BuPu", domain = b2$pctNoLog)
The states are graphed to represent the percentage of the UFC’s American born fighters hail from that state.
When the mouse hovers over a state, the State’s name is displayed. When you click on a cluster, the cluster will expand, showing more options. The icons are UFC gloves. When you click on a glove, a stock photo of the fighter will appear. The arrows in the top left corner zoom in and out. The easy button below the arrows zooms the map to level 3 The mini map in the bottom left corner shows where the map is currently located on a world map. The measure button in the top right corner allows the user to measure distance between two locations.
leaflet(states) %>%
addProviderTiles(provider = "CartoDB.PositronNoLabels") %>%
setView(lat = 39.514288, lng = -96.594997, zoom = 3) %>%
addPolygons(
fillColor = ~pal(b2$pct),
weight = 2,
opacity = .6,
color = "white",
dashArray = "3",
fillOpacity = 1,
label = b2$NAME,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE)) %>%
addLegend(
pal = palB,
values = ~b2$pctNoLog,
opacity = 0.7,
title = "% of Fighters",
position = "bottomright") %>%
addMarkers(icon = gloveIcon,
lat = a1$lat,
lng = a1$lon,
popup = popupImage(a1$photo, src = "remote", width = 100, height = 150),
clusterOptions = markerClusterOptions(),
clusterId = "state") %>%
addMiniMap(position = "bottomleft") %>%
addEasyButton(easyButton(icon="fa-globe",
title="Zoom to Level 3",
onClick=JS("function(btn, map){ map.locate({setView: true}); }"))) %>%
addMeasure(
primaryLengthUnit = "feet",
secondaryLengthUnit = "miles",
primaryAreaUnit = "acres",
secondaryAreaUnit = "sqmiles",
activeColor = "#3D535D",
completedColor = "#7D4479")
This map only contains 300 markers for the fighter’s place of birth. I need to find a commercial service that can do a large batch geo-coord conversion to obtain all of the lat-long I would need to finish the map.
fights2 <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/Fights_Updated.csv")
fights2$Date <- as.Date(fights2$Date, format = "%m/%d/%Y")
fights3 <- fights2 %>%
mutate(Year = format(Date, "%Y"))
fights4 <- fights3 %>%
group_by(Year, Method) %>%
summarize(N = n()) %>%
mutate(freq = N / sum(N),
pct = round((freq*100), 0))
fights4$Year <- as.Date(fights4$Year, format = "%Y")
ggplot(fights4, aes(x = Year, y = pct, colour = Method)) +
geom_point() +
geom_smooth(se = FALSE, method = "lm") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
This graph helps to illustrate the fact that submission victories were extremely popular in the beginning of the UFC, but as time passes, the rate of submission victories falls sharply. This is likely due to the effect of the Gracie family on the UFC. The Gracie family were Jiu Jitsu experts who won many of their fights by submission because it was an unknown martial art to many of the early fighters in the UFC. As time progressed, fighters began learning Jiu Jitsu and learning defenses.
The graph also helps to show the increase in decision victories over time. The first UFC events did not have time limits or rounds which accounts for the delayed start of decision victories. The increase in decisions can possibly be attributed to the growth in skill of the fighters. As the popularity of the UFC grew, fighters began learning multiple disciplines of martial arts which helped even out many of the fights.
Loading fresh data. Fighters is the list of all UFC fighters Fights is data from all UFC fights
Fighters <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/Fighters_Updated.csv", col_types = cols(Birth_Date = col_date(format = "%m/%d/%Y")))[,-1]
Fights <- read_csv("C:/Users/Trevor/Documents/School/_previous/RStudio/ufc/Fights_Updated.csv", col_types = cols(Date = col_date(format = "%m/%d/%Y")))[,-1]
gsub to prepare to merge the data frames
Fights$Fighter1_id <- gsub('.*-','',Fights$Fighter1_url)
Fights$Fighter2_id <- gsub('.*-','',Fights$Fighter2_url)
Fights=merge(Fights,Fighters[,c('Birth_Date','Fighter_id')],by.x='Fighter1_id',by.y='Fighter_id')
Fights=merge(Fights,Fighters[,c('Birth_Date','Fighter_id')],by.x='Fighter2_id',by.y='Fighter_id')
creating a new column that inputs whether the winner of the fight was younger or older
oldwins <- mutate(Fights, older =
ifelse((Birth_Date.x < Birth_Date.y & endsWith(Fighter1, 'win')) | (Birth_Date.y < Birth_Date.x & endsWith(Fighter2, 'win')), "older", "younger"),
fightyear = make_date(year(Date)))
This graph shows us that the younger fighter wins the majority of the time and that has generally been the trend for the entierty of the UFC’s history. THe black line shows the date when the UFC implemented strict USADA drug testing. My thoughts were that the drug testing would have an effect on the effectiveness of the older fighters but this graph does no show much of a difference thus far.
The black vertical line represents when the UFC implemented USADA drug testing.
na.omit(oldwins) %>%
ggplot(aes(x = fightyear, fill = older)) +
geom_bar(position = "fill") +
geom_vline(aes(xintercept=as.numeric(as.Date("2015-07-01"))),size=2)
The Y axis is all of the different types of victories The X Axis is the dates of the fights. The black vertical line represents when the UFC implemented USADA drug testing
na.omit(oldwins) %>%
ggplot(aes(x = Date, y = Method, col = older)) +
geom_point(alpha = 0.5) +
geom_jitter(na.rm = TRUE) +
geom_vline(aes(xintercept=as.numeric(as.Date("2015-07-01"))),size=1)