Data Exploration Exercise

This is a short exercise to practice data visualization and modeling using TidyTuesday Week 3 Art Collections data from Tate Art Museum. The data visualizations created in this notebook looked at video/flim medium, digital medium and the ratio of artworks to artists. The modeling section involves training a regularised regression model with text features using Julia Silge’s tidymodels tutorial on the Tate collection dataset.

# load libaries
library(tidyverse)
library(viridis)
library(tidytext)
library(tidymodels)
library(glmnet)
library(stopwords)
library(textrecipes)
library(vip)
library(wesanderson)
library(ggpubr)
# import data
artwork <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-12/artwork.csv')

── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_character(),
  id = col_double(),
  artistId = col_double(),
  year = col_double(),
  acquisitionYear = col_double(),
  width = col_double(),
  height = col_double(),
  depth = col_double(),
  thumbnailCopyright = col_logical()
)
ℹ Use `spec()` for the full column specifications.
artists <- readr::read_csv("https://github.com/tategallery/collection/raw/master/artist_data.csv")

── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  id = col_double(),
  name = col_character(),
  gender = col_character(),
  dates = col_character(),
  yearOfBirth = col_double(),
  yearOfDeath = col_double(),
  placeOfBirth = col_character(),
  placeOfDeath = col_character(),
  url = col_character()
)
dim(artwork)
[1] 69201    20
dim(artists)
[1] 3532    9

Visualization

# distribution: creation year
p1 = artwork %>% filter(!is.na(year)) %>% ggplot(aes(x=year)) + geom_histogram(alpha=0.8, binwidth = 10, fill="#014f86") + labs(title="Distribution of Creation Year", caption="Data from Tate Art Museum", x="Year") + theme(title=element_text(size=9))

# distribution: acquisition year
p2 = artwork %>% filter(!is.na(acquisitionYear))  %>% ggplot(aes(x=acquisitionYear)) + geom_histogram(alpha=0.8, binwidth = 10, fill="#bc6c25") + labs(title="Distribution of Acquisition Year", x="Year", caption="Data from Tate Art Museum") + theme(title=element_text(size=9))

ggarrange(p1, p2, ncol=2, nrow=1)

n_distinct(artwork$medium)
[1] 3402
n_distinct(artwork$year)
[1] 361
# proportion of video/flim medium by creation year
artwork$vf = ifelse(grepl('Video|video|Film|flim', artwork$medium),"vf","non-vf")

artwork %>% group_by(year, vf) %>% tally() %>% mutate(proportion= n/sum(n)) %>% filter(vf!="non-vf") %>% ggplot(aes(x=year, y=proportion)) + geom_col(fill="#0f4c5c", alpha=0.7) + labs(x="Year", y= "Proportion", title="Proportion of Video/flim artworks by creation year", caption="Data from Tate Art Museum") 

# proportion of digital by creation year
artwork$digital = ifelse(grepl('Digital|digital', artwork$medium),"digital","non-digital")

artwork %>% group_by(year, digital) %>% tally() %>% mutate(proportion= n/sum(n)) %>% filter(digital!="non-digital") %>% ggplot(aes(x=year, y=proportion)) + geom_col(fill="#0f4c5c",alpha=0.7) + labs(x="Year", y= "Proportion", title="Proportion of digital artworks by creation year", caption="Data from Tate Art Museum")

# proportion of digital artworks by acquisition year 
artwork %>% group_by(acquisitionYear, digital) %>% tally() %>% mutate(proportion= n/sum(n)) %>% filter(digital!="non-digital") %>% ggplot(aes(x=acquisitionYear, y=proportion)) + geom_point(size=3, color="#9a031e") + geom_segment(aes(x=acquisitionYear, xend=acquisitionYear, y=0, yend=proportion), color="#05668d") + labs(x="Acquisition Year", y= "Proportion", title="Proportion of digital artworks by acquisition year", caption="Data from Tate Art Museum") + theme(legend.position="none") 

#unique artist count by acquisitionYear
artwork %>% filter(!is.na(acquisitionYear)) %>% group_by(acquisitionYear) %>% summarise(artist_count=n_distinct(artistId, na.rm=TRUE)) %>% ggplot(aes(x=acquisitionYear, y=artist_count)) + geom_line(color="#e36414") + labs(x="Year", y= "Artist count", title="Artist count and acquisition year", caption="Data from Tate Art Museum") 

# unique artist count by acquisitionYear
artwork %>% filter(acquisitionYear>=1900) %>% group_by(acquisitionYear) %>% summarise(artist_count=n_distinct(artistId, na.rm=TRUE), artwork_count=n_distinct(id), ratio= artwork_count/artist_count) %>% ggplot(aes(x=acquisitionYear, y=ratio, fill=ratio)) + geom_col() + labs(x="Acquisition Year", y= "Ratio", title="Ratio of artworks to artists, by acquisition year (1900 onwards)", subtitle="Artworks owned or jointly owned by Tate", caption="Data from Tate Art Museum", fill="Ratio") + theme_bw() + scale_fill_viridis(option="cividis")
`summarise()` ungrouping output (override with `.groups` argument)

# 5 most frequent mediums: artworks created after 1900
artwork %>% filter(!is.na(medium)) %>% filter(year > 1900) %>% group_by(medium) %>% tally(sort=T) %>% slice(1:5)

# 5 most frequent medium: artworks created before 1900
artwork %>% filter(!is.na(medium)) %>% filter(year <= 1900) %>% group_by(medium) %>% tally(sort=T) %>% slice(1:5)
# most common words in describing medium 
later_df = artwork %>% filter(year > 1900) %>% select(year, medium) %>% na.omit() %>% arrange(year)

later_df %>%
  unnest_tokens(word, medium) %>%
  count(word, sort = TRUE)

Modeling

# model
set.seed(123)
art_split <- initial_split(later_df, strata = year)
art_train <- training(art_split)
art_test <- testing(art_split)

set.seed(234)
art_folds <- vfold_cv(art_train, strata = year)
art_folds
#  10-fold cross-validation using stratification 
# preprocess 
art_rec <- recipe(year ~ medium, data = art_train) %>%
  step_tokenize(medium) %>% #make single words
  step_stopwords(medium) %>% #remove stop words
  step_tokenfilter(medium, max_tokens = 500) %>% #top 500 most used tokens
  step_tfidf(medium)

art_rec
Data Recipe

Inputs:

Operations:

Tokenization for medium
Stop word removal for medium
Text filtering for medium
Term frequency-inverse document frequency with medium
# specify model
sparse_bp <- hardhat::default_recipe_blueprint(composition = "dgCMatrix")

lasso_spec <- linear_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")

art_wf <- workflow() %>%
  add_recipe(art_rec, blueprint = sparse_bp) %>%
  add_model(lasso_spec)

art_wf
══ Workflow ═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: linear_reg()

── Preprocessor ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
4 Recipe Steps

● step_tokenize()
● step_stopwords()
● step_tokenfilter()
● step_tfidf()

── Model ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Linear Regression Model Specification (regression)

Main Arguments:
  penalty = tune()
  mixture = 1

Computational engine: glmnet 
# change range (regularization penalty)
lambda_grid <- grid_regular(penalty(range = c(-3, 0)), levels = 20)

# tune lasso model
doParallel::registerDoParallel()
set.seed(1234)

lasso_rs <- tune_grid(
  art_wf,
  resamples = art_folds,
  grid = lambda_grid
)

lasso_rs
# Tuning results
# 10-fold cross-validation using stratification 
# model evaluation
autoplot(lasso_rs)


show_best(lasso_rs, "rmse")
# select best penalty 
best_rmse <- select_best(lasso_rs, "rmse")

final_lasso <- finalize_workflow(art_wf, best_rmse)
final_lasso
══ Workflow ═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: linear_reg()

── Preprocessor ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
4 Recipe Steps

● step_tokenize()
● step_stopwords()
● step_tokenfilter()
● step_tfidf()

── Model ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Linear Regression Model Specification (regression)

Main Arguments:
  penalty = 0.0784759970351461
  mixture = 1

Computational engine: glmnet 
# fit model on training data and evaluate on testing data
art_final <- last_fit(final_lasso, art_split)
collect_metrics(art_final)
# variable importance 
art_vip <- pull_workflow_fit(art_final$.workflow[[1]]) %>%
  vi()

art_vip %>%
  group_by(Sign) %>%
  slice_max(abs(Importance), n = 20) %>%
  ungroup() %>%
  mutate(
    Variable = str_remove(Variable, "tfidf_medium_"),
    Importance = abs(Importance),
    Variable = fct_reorder(Variable, Importance),
    Sign = if_else(Sign == "POS", "More in later art", "More in earlier art")
  ) %>%
  ggplot(aes(Importance, Variable, fill = Sign)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~Sign, scales = "free") +
  labs(y = NULL) + 
  theme_minimal() + 
  scale_fill_manual(values = wes_palette("Moonrise2"))

# plot true and predicted values on testing data
collect_predictions(art_final) %>%
  ggplot(aes(year, .pred)) +
  geom_abline(lty = 2, color = "gray50", size = 1.2) +
  geom_point(size = 1.5, alpha = 0.3, color = "#05668d") +
  coord_fixed()

# view misclassifications
misclassified <- collect_predictions(art_final) %>%
  bind_cols(art_test %>% select(medium)) %>%
  filter(abs(year - .pred) > 50)

misclassified %>%
  arrange(year)
# residuals of the test set
augment(art_final) %>%
  ggplot(aes(.pred, .resid)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray50", size = 1.2) +
  geom_point(size = 1.5, alpha = 0.3, color = "#05668d") +
  geom_smooth(color = "black")

  • The plot above indicates heteroscedasticity, with lower variance for recent artwork and higher variance for older artwork hence, it is not a good model.
LS0tCnRpdGxlOiAiQXJ0IENvbGxlY3Rpb25zIgpkYXRlOiAiMjAyMSBXZWVrIDAzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBEYXRhIEV4cGxvcmF0aW9uIEV4ZXJjaXNlIAoKVGhpcyBpcyBhIHNob3J0IGV4ZXJjaXNlIHRvIHByYWN0aWNlIGRhdGEgdmlzdWFsaXphdGlvbiBhbmQgbW9kZWxpbmcgdXNpbmcgW1RpZHlUdWVzZGF5XShodHRwczovL2dpdGh1Yi5jb20vcmZvcmRhdGFzY2llbmNlL3RpZHl0dWVzZGF5KSBXZWVrIDMgW0FydCBDb2xsZWN0aW9uc10oaHR0cHM6Ly9naXRodWIuY29tL3Jmb3JkYXRhc2NpZW5jZS90aWR5dHVlc2RheS9ibG9iL21hc3Rlci9kYXRhLzIwMjEvMjAyMS0wMS0xMi9yZWFkbWUubWQpIGRhdGEgZnJvbSBbVGF0ZSBBcnQgTXVzZXVtXShodHRwczovL2dpdGh1Yi5jb20vdGF0ZWdhbGxlcnkvY29sbGVjdGlvbikuIFRoZSBkYXRhIHZpc3VhbGl6YXRpb25zIGNyZWF0ZWQgaW4gdGhpcyBub3RlYm9vayBsb29rZWQgYXQgdmlkZW8vZmxpbSBtZWRpdW0sIGRpZ2l0YWwgbWVkaXVtIGFuZCB0aGUgcmF0aW8gb2YgYXJ0d29ya3MgdG8gYXJ0aXN0cy4gVGhlIG1vZGVsaW5nIHNlY3Rpb24gaW52b2x2ZXMgdHJhaW5pbmcgYSByZWd1bGFyaXNlZCByZWdyZXNzaW9uIG1vZGVsIHdpdGggdGV4dCBmZWF0dXJlcyB1c2luZyBbSnVsaWEgU2lsZ2UncyB0aWR5bW9kZWxzIHR1dG9yaWFsXShodHRwczovL2p1bGlhc2lsZ2UuY29tL2Jsb2cvdGF0ZS1jb2xsZWN0aW9uLykgb24gdGhlIFRhdGUgY29sbGVjdGlvbiBkYXRhc2V0LiAKCgpgYGB7ciwgbWVzc3NhZ2U9RkFMU0V9CiMgbG9hZCBsaWJhcmllcwpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeSh2aXJpZGlzKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHRpZHltb2RlbHMpCmxpYnJhcnkoZ2xtbmV0KQpsaWJyYXJ5KHN0b3B3b3JkcykKbGlicmFyeSh0ZXh0cmVjaXBlcykKbGlicmFyeSh2aXApCmxpYnJhcnkod2VzYW5kZXJzb24pCmxpYnJhcnkoZ2dwdWJyKQpgYGAKCmBgYHtyfQojIGltcG9ydCBkYXRhCmFydHdvcmsgPC0gcmVhZHI6OnJlYWRfY3N2KCdodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vcmZvcmRhdGFzY2llbmNlL3RpZHl0dWVzZGF5L21hc3Rlci9kYXRhLzIwMjEvMjAyMS0wMS0xMi9hcnR3b3JrLmNzdicpCmFydGlzdHMgPC0gcmVhZHI6OnJlYWRfY3N2KCJodHRwczovL2dpdGh1Yi5jb20vdGF0ZWdhbGxlcnkvY29sbGVjdGlvbi9yYXcvbWFzdGVyL2FydGlzdF9kYXRhLmNzdiIpCmRpbShhcnR3b3JrKQpkaW0oYXJ0aXN0cykKYGBgCgojIyMgVmlzdWFsaXphdGlvbgoKYGBge3IsIHdhcm5pbmc9RkFMU0UsIGZpZy5oZWlnaHQ9MiwgZmlnLndpZHRoPTR9CiMgZGlzdHJpYnV0aW9uOiBjcmVhdGlvbiB5ZWFyCnAxID0gYXJ0d29yayAlPiUgZmlsdGVyKCFpcy5uYSh5ZWFyKSkgJT4lIGdncGxvdChhZXMoeD15ZWFyKSkgKyBnZW9tX2hpc3RvZ3JhbShhbHBoYT0wLjgsIGJpbndpZHRoID0gMTAsIGZpbGw9IiMwMTRmODYiKSArIGxhYnModGl0bGU9IkRpc3RyaWJ1dGlvbiBvZiBDcmVhdGlvbiBZZWFyIiwgY2FwdGlvbj0iRGF0YSBmcm9tIFRhdGUgQXJ0IE11c2V1bSIsIHg9IlllYXIiKSArIHRoZW1lKHRpdGxlPWVsZW1lbnRfdGV4dChzaXplPTkpKQoKIyBkaXN0cmlidXRpb246IGFjcXVpc2l0aW9uIHllYXIKcDIgPSBhcnR3b3JrICU+JSBmaWx0ZXIoIWlzLm5hKGFjcXVpc2l0aW9uWWVhcikpICAlPiUgZ2dwbG90KGFlcyh4PWFjcXVpc2l0aW9uWWVhcikpICsgZ2VvbV9oaXN0b2dyYW0oYWxwaGE9MC44LCBiaW53aWR0aCA9IDEwLCBmaWxsPSIjYmM2YzI1IikgKyBsYWJzKHRpdGxlPSJEaXN0cmlidXRpb24gb2YgQWNxdWlzaXRpb24gWWVhciIsIHg9IlllYXIiLCBjYXB0aW9uPSJEYXRhIGZyb20gVGF0ZSBBcnQgTXVzZXVtIikgKyB0aGVtZSh0aXRsZT1lbGVtZW50X3RleHQoc2l6ZT05KSkKCmdnYXJyYW5nZShwMSwgcDIsIG5jb2w9MiwgbnJvdz0xKQpgYGAKCmBgYHtyfQpuX2Rpc3RpbmN0KGFydHdvcmskbWVkaXVtKQpuX2Rpc3RpbmN0KGFydHdvcmskeWVhcikKYGBgCgoKYGBge3J9CiMgcHJvcG9ydGlvbiBvZiB2aWRlby9mbGltIG1lZGl1bSBieSBjcmVhdGlvbiB5ZWFyCmFydHdvcmskdmYgPSBpZmVsc2UoZ3JlcGwoJ1ZpZGVvfHZpZGVvfEZpbG18ZmxpbScsIGFydHdvcmskbWVkaXVtKSwidmYiLCJub24tdmYiKQoKYXJ0d29yayAlPiUgZ3JvdXBfYnkoeWVhciwgdmYpICU+JSB0YWxseSgpICU+JSBtdXRhdGUocHJvcG9ydGlvbj0gbi9zdW0obikpICU+JSBmaWx0ZXIodmYhPSJub24tdmYiKSAlPiUgZ2dwbG90KGFlcyh4PXllYXIsIHk9cHJvcG9ydGlvbikpICsgZ2VvbV9jb2woZmlsbD0iIzBmNGM1YyIsIGFscGhhPTAuNykgKyBsYWJzKHg9IlllYXIiLCB5PSAiUHJvcG9ydGlvbiIsIHRpdGxlPSJQcm9wb3J0aW9uIG9mIFZpZGVvL2ZsaW0gYXJ0d29ya3MgYnkgY3JlYXRpb24geWVhciIsIGNhcHRpb249IkRhdGEgZnJvbSBUYXRlIEFydCBNdXNldW0iKSAKYGBgCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KIyBwcm9wb3J0aW9uIG9mIGRpZ2l0YWwgYnkgY3JlYXRpb24geWVhcgphcnR3b3JrJGRpZ2l0YWwgPSBpZmVsc2UoZ3JlcGwoJ0RpZ2l0YWx8ZGlnaXRhbCcsIGFydHdvcmskbWVkaXVtKSwiZGlnaXRhbCIsIm5vbi1kaWdpdGFsIikKCmFydHdvcmsgJT4lIGdyb3VwX2J5KHllYXIsIGRpZ2l0YWwpICU+JSB0YWxseSgpICU+JSBtdXRhdGUocHJvcG9ydGlvbj0gbi9zdW0obikpICU+JSBmaWx0ZXIoZGlnaXRhbCE9Im5vbi1kaWdpdGFsIikgJT4lIGdncGxvdChhZXMoeD15ZWFyLCB5PXByb3BvcnRpb24pKSArIGdlb21fY29sKGZpbGw9IiMwZjRjNWMiLGFscGhhPTAuNykgKyBsYWJzKHg9IlllYXIiLCB5PSAiUHJvcG9ydGlvbiIsIHRpdGxlPSJQcm9wb3J0aW9uIG9mIGRpZ2l0YWwgYXJ0d29ya3MgYnkgY3JlYXRpb24geWVhciIsIGNhcHRpb249IkRhdGEgZnJvbSBUYXRlIEFydCBNdXNldW0iKQpgYGAKCmBgYHtyLCB3YXJuaW5nPUZBTFNFfQojIHByb3BvcnRpb24gb2YgZGlnaXRhbCBhcnR3b3JrcyBieSBhY3F1aXNpdGlvbiB5ZWFyIAphcnR3b3JrICU+JSBncm91cF9ieShhY3F1aXNpdGlvblllYXIsIGRpZ2l0YWwpICU+JSB0YWxseSgpICU+JSBtdXRhdGUocHJvcG9ydGlvbj0gbi9zdW0obikpICU+JSBmaWx0ZXIoZGlnaXRhbCE9Im5vbi1kaWdpdGFsIikgJT4lIGdncGxvdChhZXMoeD1hY3F1aXNpdGlvblllYXIsIHk9cHJvcG9ydGlvbikpICsgZ2VvbV9wb2ludChzaXplPTMsIGNvbG9yPSIjOWEwMzFlIikgKyBnZW9tX3NlZ21lbnQoYWVzKHg9YWNxdWlzaXRpb25ZZWFyLCB4ZW5kPWFjcXVpc2l0aW9uWWVhciwgeT0wLCB5ZW5kPXByb3BvcnRpb24pLCBjb2xvcj0iIzA1NjY4ZCIpICsgbGFicyh4PSJBY3F1aXNpdGlvbiBZZWFyIiwgeT0gIlByb3BvcnRpb24iLCB0aXRsZT0iUHJvcG9ydGlvbiBvZiBkaWdpdGFsIGFydHdvcmtzIGJ5IGFjcXVpc2l0aW9uIHllYXIiLCBjYXB0aW9uPSJEYXRhIGZyb20gVGF0ZSBBcnQgTXVzZXVtIikgKyB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKSAKYGBgCgpgYGB7ciwgbWVzc2FnZT1GQUxTRX0KI3VuaXF1ZSBhcnRpc3QgY291bnQgYnkgYWNxdWlzaXRpb25ZZWFyCmFydHdvcmsgJT4lIGZpbHRlcighaXMubmEoYWNxdWlzaXRpb25ZZWFyKSkgJT4lIGdyb3VwX2J5KGFjcXVpc2l0aW9uWWVhcikgJT4lIHN1bW1hcmlzZShhcnRpc3RfY291bnQ9bl9kaXN0aW5jdChhcnRpc3RJZCwgbmEucm09VFJVRSkpICU+JSBnZ3Bsb3QoYWVzKHg9YWNxdWlzaXRpb25ZZWFyLCB5PWFydGlzdF9jb3VudCkpICsgZ2VvbV9saW5lKGNvbG9yPSIjZTM2NDE0IikgKyBsYWJzKHg9IlllYXIiLCB5PSAiQXJ0aXN0IGNvdW50IiwgdGl0bGU9IkFydGlzdCBjb3VudCBhbmQgYWNxdWlzaXRpb24geWVhciIsIGNhcHRpb249IkRhdGEgZnJvbSBUYXRlIEFydCBNdXNldW0iKSAKYGBgCgpgYGB7cn0KIyB1bmlxdWUgYXJ0aXN0IGNvdW50IGJ5IGFjcXVpc2l0aW9uWWVhcgphcnR3b3JrICU+JSBmaWx0ZXIoYWNxdWlzaXRpb25ZZWFyPj0xOTAwKSAlPiUgZ3JvdXBfYnkoYWNxdWlzaXRpb25ZZWFyKSAlPiUgc3VtbWFyaXNlKGFydGlzdF9jb3VudD1uX2Rpc3RpbmN0KGFydGlzdElkLCBuYS5ybT1UUlVFKSwgYXJ0d29ya19jb3VudD1uX2Rpc3RpbmN0KGlkKSwgcmF0aW89IGFydHdvcmtfY291bnQvYXJ0aXN0X2NvdW50KSAlPiUgZ2dwbG90KGFlcyh4PWFjcXVpc2l0aW9uWWVhciwgeT1yYXRpbywgZmlsbD1yYXRpbykpICsgZ2VvbV9jb2woKSArIGxhYnMoeD0iQWNxdWlzaXRpb24gWWVhciIsIHk9ICJSYXRpbyIsIHRpdGxlPSJSYXRpbyBvZiBhcnR3b3JrcyB0byBhcnRpc3RzLCBieSBhY3F1aXNpdGlvbiB5ZWFyICgxOTAwIG9ud2FyZHMpIiwgc3VidGl0bGU9IkFydHdvcmtzIG93bmVkIG9yIGpvaW50bHkgb3duZWQgYnkgVGF0ZSIsIGNhcHRpb249IkRhdGEgZnJvbSBUYXRlIEFydCBNdXNldW0iLCBmaWxsPSJSYXRpbyIpICsgdGhlbWVfYncoKSArIHNjYWxlX2ZpbGxfdmlyaWRpcyhvcHRpb249ImNpdmlkaXMiKQpgYGAKCmBgYHtyfQojIDUgbW9zdCBmcmVxdWVudCBtZWRpdW1zOiBhcnR3b3JrcyBjcmVhdGVkIGFmdGVyIDE5MDAKYXJ0d29yayAlPiUgZmlsdGVyKCFpcy5uYShtZWRpdW0pKSAlPiUgZmlsdGVyKHllYXIgPiAxOTAwKSAlPiUgZ3JvdXBfYnkobWVkaXVtKSAlPiUgdGFsbHkoc29ydD1UKSAlPiUgc2xpY2UoMTo1KQoKIyA1IG1vc3QgZnJlcXVlbnQgbWVkaXVtOiBhcnR3b3JrcyBjcmVhdGVkIGJlZm9yZSAxOTAwCmFydHdvcmsgJT4lIGZpbHRlcighaXMubmEobWVkaXVtKSkgJT4lIGZpbHRlcih5ZWFyIDw9IDE5MDApICU+JSBncm91cF9ieShtZWRpdW0pICU+JSB0YWxseShzb3J0PVQpICU+JSBzbGljZSgxOjUpCmBgYAoKYGBge3J9CiMgbW9zdCBjb21tb24gd29yZHMgaW4gZGVzY3JpYmluZyBtZWRpdW0gCmxhdGVyX2RmID0gYXJ0d29yayAlPiUgZmlsdGVyKHllYXIgPiAxOTAwKSAlPiUgc2VsZWN0KHllYXIsIG1lZGl1bSkgJT4lIG5hLm9taXQoKSAlPiUgYXJyYW5nZSh5ZWFyKQoKbGF0ZXJfZGYgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBtZWRpdW0pICU+JQogIGNvdW50KHdvcmQsIHNvcnQgPSBUUlVFKQpgYGAKCiMjIyBNb2RlbGluZyAKKiByZWZlcmVuY2U6IFtFeHBsb3JlIGFydCBtZWRpYSBvdmVyIHRpbWUgaW4gdGhlICNUaWR5VHVlc2RheSBUYXRlIGNvbGxlY3Rpb24gZGF0YXNldCBieSBKdWxpYSBTaWxnZV0oaHR0cHM6Ly9qdWxpYXNpbGdlLmNvbS9ibG9nL3RhdGUtY29sbGVjdGlvbi8pLiAKCmBgYHtyfQojIG1vZGVsCnNldC5zZWVkKDEyMykKYXJ0X3NwbGl0IDwtIGluaXRpYWxfc3BsaXQobGF0ZXJfZGYsIHN0cmF0YSA9IHllYXIpCmFydF90cmFpbiA8LSB0cmFpbmluZyhhcnRfc3BsaXQpCmFydF90ZXN0IDwtIHRlc3RpbmcoYXJ0X3NwbGl0KQoKc2V0LnNlZWQoMjM0KQphcnRfZm9sZHMgPC0gdmZvbGRfY3YoYXJ0X3RyYWluLCBzdHJhdGEgPSB5ZWFyKQphcnRfZm9sZHMKYGBgCgpgYGB7cn0KIyBwcmVwcm9jZXNzIAphcnRfcmVjIDwtIHJlY2lwZSh5ZWFyIH4gbWVkaXVtLCBkYXRhID0gYXJ0X3RyYWluKSAlPiUKICBzdGVwX3Rva2VuaXplKG1lZGl1bSkgJT4lICNtYWtlIHNpbmdsZSB3b3JkcwogIHN0ZXBfc3RvcHdvcmRzKG1lZGl1bSkgJT4lICNyZW1vdmUgc3RvcCB3b3JkcwogIHN0ZXBfdG9rZW5maWx0ZXIobWVkaXVtLCBtYXhfdG9rZW5zID0gNTAwKSAlPiUgI3RvcCA1MDAgbW9zdCB1c2VkIHRva2VucwogIHN0ZXBfdGZpZGYobWVkaXVtKQoKYXJ0X3JlYwpgYGAKCmBgYHtyfQojIHNwZWNpZnkgbW9kZWwKc3BhcnNlX2JwIDwtIGhhcmRoYXQ6OmRlZmF1bHRfcmVjaXBlX2JsdWVwcmludChjb21wb3NpdGlvbiA9ICJkZ0NNYXRyaXgiKQoKbGFzc29fc3BlYyA8LSBsaW5lYXJfcmVnKHBlbmFsdHkgPSB0dW5lKCksIG1peHR1cmUgPSAxKSAlPiUKICBzZXRfZW5naW5lKCJnbG1uZXQiKQoKYXJ0X3dmIDwtIHdvcmtmbG93KCkgJT4lCiAgYWRkX3JlY2lwZShhcnRfcmVjLCBibHVlcHJpbnQgPSBzcGFyc2VfYnApICU+JQogIGFkZF9tb2RlbChsYXNzb19zcGVjKQoKYXJ0X3dmCmBgYAoKYGBge3J9CiMgY2hhbmdlIHJhbmdlIChyZWd1bGFyaXphdGlvbiBwZW5hbHR5KQpsYW1iZGFfZ3JpZCA8LSBncmlkX3JlZ3VsYXIocGVuYWx0eShyYW5nZSA9IGMoLTMsIDApKSwgbGV2ZWxzID0gMjApCgojIHR1bmUgbGFzc28gbW9kZWwKZG9QYXJhbGxlbDo6cmVnaXN0ZXJEb1BhcmFsbGVsKCkKc2V0LnNlZWQoMTIzNCkKCmxhc3NvX3JzIDwtIHR1bmVfZ3JpZCgKICBhcnRfd2YsCiAgcmVzYW1wbGVzID0gYXJ0X2ZvbGRzLAogIGdyaWQgPSBsYW1iZGFfZ3JpZAopCgpsYXNzb19ycwpgYGAKCmBgYHtyfQojIG1vZGVsIGV2YWx1YXRpb24KYXV0b3Bsb3QobGFzc29fcnMpCgpzaG93X2Jlc3QobGFzc29fcnMsICJybXNlIikKYGBgCgoKYGBge3J9CiMgc2VsZWN0IGJlc3QgcGVuYWx0eSAKYmVzdF9ybXNlIDwtIHNlbGVjdF9iZXN0KGxhc3NvX3JzLCAicm1zZSIpCgpmaW5hbF9sYXNzbyA8LSBmaW5hbGl6ZV93b3JrZmxvdyhhcnRfd2YsIGJlc3Rfcm1zZSkKZmluYWxfbGFzc28KYGBgCgoKYGBge3J9CiMgZml0IG1vZGVsIG9uIHRyYWluaW5nIGRhdGEgYW5kIGV2YWx1YXRlIG9uIHRlc3RpbmcgZGF0YQphcnRfZmluYWwgPC0gbGFzdF9maXQoZmluYWxfbGFzc28sIGFydF9zcGxpdCkKY29sbGVjdF9tZXRyaWNzKGFydF9maW5hbCkKYGBgCgoKYGBge3J9CiMgdmFyaWFibGUgaW1wb3J0YW5jZSAKYXJ0X3ZpcCA8LSBwdWxsX3dvcmtmbG93X2ZpdChhcnRfZmluYWwkLndvcmtmbG93W1sxXV0pICU+JQogIHZpKCkKCmFydF92aXAgJT4lCiAgZ3JvdXBfYnkoU2lnbikgJT4lCiAgc2xpY2VfbWF4KGFicyhJbXBvcnRhbmNlKSwgbiA9IDIwKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgbXV0YXRlKAogICAgVmFyaWFibGUgPSBzdHJfcmVtb3ZlKFZhcmlhYmxlLCAidGZpZGZfbWVkaXVtXyIpLAogICAgSW1wb3J0YW5jZSA9IGFicyhJbXBvcnRhbmNlKSwKICAgIFZhcmlhYmxlID0gZmN0X3Jlb3JkZXIoVmFyaWFibGUsIEltcG9ydGFuY2UpLAogICAgU2lnbiA9IGlmX2Vsc2UoU2lnbiA9PSAiUE9TIiwgIk1vcmUgaW4gbGF0ZXIgYXJ0IiwgIk1vcmUgaW4gZWFybGllciBhcnQiKQogICkgJT4lCiAgZ2dwbG90KGFlcyhJbXBvcnRhbmNlLCBWYXJpYWJsZSwgZmlsbCA9IFNpZ24pKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAoflNpZ24sIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9IE5VTEwpICsgCiAgdGhlbWVfbWluaW1hbCgpICsgCiAgc2NhbGVfZmlsbF9tYW51YWwodmFsdWVzID0gd2VzX3BhbGV0dGUoIk1vb25yaXNlMiIpKQoKYGBgCgpgYGB7cn0KIyBwbG90IHRydWUgYW5kIHByZWRpY3RlZCB2YWx1ZXMgb24gdGVzdGluZyBkYXRhCmNvbGxlY3RfcHJlZGljdGlvbnMoYXJ0X2ZpbmFsKSAlPiUKICBnZ3Bsb3QoYWVzKHllYXIsIC5wcmVkKSkgKwogIGdlb21fYWJsaW5lKGx0eSA9IDIsIGNvbG9yID0gImdyYXk1MCIsIHNpemUgPSAxLjIpICsKICBnZW9tX3BvaW50KHNpemUgPSAxLjUsIGFscGhhID0gMC4zLCBjb2xvciA9ICIjMDU2NjhkIikgKwogIGNvb3JkX2ZpeGVkKCkKYGBgCgpgYGB7cn0KIyB2aWV3IG1pc2NsYXNzaWZpY2F0aW9ucwptaXNjbGFzc2lmaWVkIDwtIGNvbGxlY3RfcHJlZGljdGlvbnMoYXJ0X2ZpbmFsKSAlPiUKICBiaW5kX2NvbHMoYXJ0X3Rlc3QgJT4lIHNlbGVjdChtZWRpdW0pKSAlPiUKICBmaWx0ZXIoYWJzKHllYXIgLSAucHJlZCkgPiA1MCkKCm1pc2NsYXNzaWZpZWQgJT4lCiAgYXJyYW5nZSh5ZWFyKQpgYGAKCmBgYHtyfQojIHJlc2lkdWFscyBvZiB0aGUgdGVzdCBzZXQKYXVnbWVudChhcnRfZmluYWwpICU+JQogIGdncGxvdChhZXMoLnByZWQsIC5yZXNpZCkpICsKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBsdHkgPSAyLCBjb2xvciA9ICJncmF5NTAiLCBzaXplID0gMS4yKSArCiAgZ2VvbV9wb2ludChzaXplID0gMS41LCBhbHBoYSA9IDAuMywgY29sb3IgPSAiIzA1NjY4ZCIpICsKICBnZW9tX3Ntb290aChjb2xvciA9ICJibGFjayIpCmBgYAoKKiBUaGUgcGxvdCBhYm92ZSBpbmRpY2F0ZXMgaGV0ZXJvc2NlZGFzdGljaXR5LCB3aXRoIGxvd2VyIHZhcmlhbmNlIGZvciByZWNlbnQgYXJ0d29yayBhbmQgaGlnaGVyIHZhcmlhbmNlIGZvciBvbGRlciBhcnR3b3JrIGhlbmNlLCBpdCBpcyBub3QgYSBnb29kIG1vZGVsLiAKCg==