packages = c(
  "dplyr","ggplot2","stringr", "dslabs", "readr", "tidyr", "purrr",
  "lubridate", "rvest"
  )
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=T))
Sys.setlocale("LC_ALL","C")
[1] "C"
options(digits=4, scipen=12)
library(rvest)
library(readr)
library(dplyr)
library(ggplot2)
library(stringr)
library(lubridate)
library(tidyr)
library(dslabs)

Tidy Data

Web Scraping
library(rvest)
url = "https://en.wikipedia.org/wiki/Murder_in_the_United_States_by_state"
h = read_html(url)
tab = html_nodes(h, "table")[[2]] %>%
  html_table %>% 
  setNames(c(
    "state","population","total","murders","gun_murders",
    "gun_ownersjip","total_rate","murder_rate","gun_murder_rate"))

A. String Processing Overview


B. String Processing Part 1

B1. String Parsing

Q1: Which of the following is NOT an application of string parsing?

  • Formatting numbers and characters so they can easily be displayed in deliverables like papers and presentations.
B2. Defining Strings: Single and Double Quotes and How to Escape

Q1: Which of the following commands would not give you an error in R?

cat(" LeBron James is 6'8\" ")
 LeBron James is 6'8" 
B3. stringr Package

Q1: Which of the following are advantages of the stringr package over string processing functions in base R? Select all that apply.

  • Functions in stringr all start with “str_”, which makes them easy to look up using autocomplete.
  • Stringr functions work better with pipes.
  • he order of arguments is more consistent in stringr functions than in base R.
B4. Case Study 1: US Murders Data
sapply(tab, str_detect, ",") %>% colSums
          state      population           total         murders     gun_murders 
              0              51               3               2               1 
  gun_ownersjip      total_rate     murder_rate gun_murder_rate 
              0               0               0               0 
tab = tab %>% mutate_at(2:3, parse_number)
sapply(tab, str_detect, ",") %>% colSums
          state      population           total         murders     gun_murders 
              0               0               0               2               1 
  gun_ownersjip      total_rate     murder_rate gun_murder_rate 
              0               0               0               0 

Q1: You have a dataframe of monthly sales and profits in R

dat = read.table("data/sales.txt", header=T, sep="", stringsAsFactors=F)
dat
     Month    Sales  Profit
1  January $128,568 $16,234
2 February $109,523 $12,876
3    March $115,468 $17,920
4    April $122,274 $15,825
5      May $117,921 $15,437

Which of the following commands could convert the sales and profits columns to numeric? Select all that apply.

dat %>% mutate_at(2:3, parse_number)
     Month  Sales Profit
1  January 128568  16234
2 February 109523  12876
3    March 115468  17920
4    April 122274  15825
5      May 117921  15437
dat %>% mutate_at(2:3, funs(str_replace_all(., c("\\$|,"), "")))
     Month  Sales Profit
1  January 128568  16234
2 February 109523  12876
3    March 115468  17920
4    April 122274  15825
5      May 117921  15437
dat %>% mutate_all(2:3, parse_number)
dat$Profit <- str_replace_all(dat$Profit, c("\\$|,"), "") 
dat$Sales <- parse_number(dat$Sales) 
dat
     Month  Sales Profit
1  January 128568  16234
2 February 109523  12876
3    March 115468  17920
4    April 122274  15825
5      May 117921  15437

C. String Processing Part 2

C1. Case Study2: Reported Heights
library(dslabs)
data(reported_heights)
reported_heights %>% head
           time_stamp    sex height
1 2014-09-02 13:40:36   Male     75
2 2014-09-02 13:46:59   Male     70
3 2014-09-02 13:59:20   Male     68
4 2014-09-02 14:51:53   Male     74
5 2014-09-02 15:16:15   Male     61
6 2014-09-02 15:16:16 Female     65
reported_heights %>%
  mutate(new_height = as.numeric(height)) %>% 
  filter(is.na(new_height)) %>% 
  getElement("height")
NAs introduced by coercion
 [1] "5' 4\""                 "165cm"                  "5'7"                   
 [4] ">9000"                  "5'7\""                  "5'3\""                 
 [7] "5 feet and 8.11 inches" "5'11"                   "5'9''"                 
[10] "5'10''"                 "5,3"                    "6'"                    
[13] "6,8"                    "5' 10"                  "Five foot eight inches"
[16] "5'5\""                  "5'2\""                  "5,4"                   
[19] "5'3"                    "5'10''"                 "5'3''"                 
[22] "5'7''"                  "5'12"                   "2'33"                  
[25] "5'11"                   "5'3\""                  "5,8"                   
[28] "5'6''"                  "5'4"                    "1,70"                  
[31] "5'7.5''"                "5'7.5''"                "5'2\""                 
[34] "5' 7.78\""              "yyy"                    "5'5"                   
[37] "5'8"                    "5'6"                    "5 feet 7inches"        
[40] "6*12"                   "5 .11"                  "5 11"                  
[43] "5'4"                    "5'8\""                  "5'5"                   
[46] "5'7"                    "5'6"                    "5'11\""                
[49] "5'7\""                  "5'7"                    "5'8"                   
[52] "5' 11\""                "6'1\""                  "69\""                  
[55] "5' 7\""                 "5'10''"                 "5'10"                  
[58] "5'10"                   "5ft 9 inches"           "5 ft 9 inches"         
[61] "5'2"                    "5'11"                   "5'11''"                
[64] "5'8\""                  "708,661"                "5 feet 6 inches"       
[67] "5'10''"                 "5'8"                    "6'3\""                 
[70] "649,606"                "728,346"                "6 04"                  
[73] "5'9"                    "5'5''"                  "5'7\""                 
[76] "6'4\""                  "5'4"                    "170 cm"                
[79] "7,283,465"              "5'6"                    "5'6"                   
not_inches <- function(x, smallest = 50, tallest = 84) {
  inches <- suppressWarnings(as.numeric(x))
  ind <- is.na(inches) | inches < smallest | inches > tallest 
  ind}
problems = reported_heights$height %>% .[not_inches(.)]
problems
  [1] "6"                      "5' 4\""                 "5.3"                   
  [4] "165cm"                  "511"                    "6"                     
  [7] "2"                      "5'7"                    ">9000"                 
 [10] "5'7\""                  "5'3\""                  "5 feet and 8.11 inches"
 [13] "5.25"                   "5'11"                   "5.5"                   
 [16] "11111"                  "5'9''"                  "6"                     
 [19] "6.5"                    "150"                    "5'10''"                
 [22] "103.2"                  "5.8"                    "19"                    
 [25] "5"                      "5.6"                    "175"                   
 [28] "177"                    "300"                    "5,3"                   
 [31] "6'"                     "6"                      "5.9"                   
 [34] "6,8"                    "5' 10"                  "5.5"                   
 [37] "178"                    "163"                    "6.2"                   
 [40] "175"                    "Five foot eight inches" "6.2"                   
 [43] "5.8"                    "5.1"                    "178"                   
 [46] "165"                    "5.11"                   "5'5\""                 
 [49] "165"                    "180"                    "5'2\""                 
 [52] "5.75"                   "169"                    "5,4"                   
 [55] "7"                      "5.4"                    "157"                   
 [58] "6.1"                    "169"                    "5'3"                   
 [61] "5.6"                    "214"                    "183"                   
 [64] "5.6"                    "6"                      "162"                   
 [67] "178"                    "180"                    "5'10''"                
 [70] "170"                    "5'3''"                  "178"                   
 [73] "0.7"                    "190"                    "5.4"                   
 [76] "184"                    "5'7''"                  "5.9"                   
 [79] "5'12"                   "5.6"                    "5.6"                   
 [82] "184"                    "6"                      "167"                   
 [85] "2'33"                   "5'11"                   "5'3\""                 
 [88] "5.5"                    "5.2"                    "180"                   
 [91] "5.5"                    "5.5"                    "6.5"                   
 [94] "5,8"                    "180"                    "183"                   
 [97] "170"                    "5'6''"                  "172"                   
[100] "612"                    "5.11"                   "168"                   
[103] "5'4"                    "1,70"                   "172"                   
[106] "87"                     "5.5"                    "176"                   
[109] "5'7.5''"                "5'7.5''"                "111"                   
[112] "5'2\""                  "173"                    "174"                   
[115] "176"                    "175"                    "5' 7.78\""             
[118] "6.7"                    "12"                     "6"                     
[121] "5.1"                    "5.6"                    "5.5"                   
[124] "yyy"                    "5.2"                    "5'5"                   
[127] "5'8"                    "5'6"                    "5 feet 7inches"        
[130] "89"                     "5.6"                    "5.7"                   
[133] "183"                    "172"                    "34"                    
[136] "25"                     "6"                      "5.9"                   
[139] "168"                    "6.5"                    "170"                   
[142] "175"                    "6"                      "22"                    
[145] "5.11"                   "684"                    "6"                     
[148] "1"                      "1"                      "6*12"                  
[151] "5 .11"                  "87"                     "162"                   
[154] "165"                    "184"                    "6"                     
[157] "173"                    "1.6"                    "172"                   
[160] "170"                    "5.7"                    "5.5"                   
[163] "174"                    "170"                    "160"                   
[166] "120"                    "120"                    "23"                    
[169] "192"                    "5 11"                   "167"                   
[172] "150"                    "1.7"                    "174"                   
[175] "5.8"                    "6"                      "5'4"                   
[178] "5'8\""                  "5'5"                    "5.8"                   
[181] "5.1"                    "5.11"                   "5.7"                   
[184] "5'7"                    "5'6"                    "5'11\""                
[187] "5'7\""                  "5'7"                    "172"                   
[190] "5'8"                    "180"                    "5' 11\""               
[193] "5"                      "180"                    "180"                   
[196] "6'1\""                  "5.9"                    "5.2"                   
[199] "5.5"                    "69\""                   "5' 7\""                
[202] "5'10''"                 "5.51"                   "5'10"                  
[205] "5'10"                   "5ft 9 inches"           "5 ft 9 inches"         
[208] "5'2"                    "5'11"                   "5.8"                   
[211] "5.7"                    "167"                    "168"                   
[214] "6"                      "6.1"                    "5'11''"                
[217] "5.69"                   "178"                    "182"                   
[220] "164"                    "5'8\""                  "185"                   
[223] "6"                      "86"                     "5.7"                   
[226] "708,661"                "5.25"                   "5.5"                   
[229] "5 feet 6 inches"        "5'10''"                 "172"                   
[232] "6"                      "5'8"                    "160"                   
[235] "6'3\""                  "649,606"                "10000"                 
[238] "5.1"                    "152"                    "1"                     
[241] "180"                    "728,346"                "175"                   
[244] "158"                    "173"                    "164"                   
[247] "6 04"                   "169"                    "0"                     
[250] "185"                    "168"                    "5'9"                   
[253] "169"                    "5'5''"                  "174"                   
[256] "6.3"                    "179"                    "5'7\""                 
[259] "5.5"                    "6"                      "6"                     
[262] "170"                    "6"                      "172"                   
[265] "158"                    "100"                    "159"                   
[268] "190"                    "5.7"                    "170"                   
[271] "158"                    "6'4\""                  "180"                   
[274] "5.57"                   "5'4"                    "210"                   
[277] "88"                     "6"                      "162"                   
[280] "170 cm"                 "5.7"                    "170"                   
[283] "157"                    "186"                    "170"                   
[286] "7,283,465"              "5"                      "5"                     
[289] "34"                     "161"                    "5'6"                   
[292] "5'6"                   
str_subset(problems, "cm|inches")
[1] "165cm"                  "5 feet and 8.11 inches" "Five foot eight inches"
[4] "5 feet 7inches"         "5ft 9 inches"           "5 ft 9 inches"         
[7] "5 feet 6 inches"        "170 cm"                
str_subset(problems, "cm|inches") %>% str_extract("cm|inches")
[1] "cm"     "inches" "inches" "inches" "inches" "inches" "inches" "cm"    

Q1: In the video, we use the function not_inches to identify heights that were incorrectly entered

not_inches <- function(x, smallest = 50, tallest = 84) {
  inches <- suppressWarnings(as.numeric(x))
  ind <- is.na(inches) | inches < smallest | inches > tallest 
  ind
}

In this function, what TWO types of values are identified as not being correctly formatted in inches?

  • Values that result in NA’s when converted to numeric
  • Values less than 50 inches or greater than 84 inches

Q2: Which of the following arguments, when passed to the function not_inches, would return the vector c(FALSE)?

c(70) %>% not_inches
[1] FALSE

Q3: Our function not_inches returns the object ind. Which answer correctly describes ind?

  • ind is a logical vector of TRUE and FALSE, equal in length to the vector x (in the arguments list). TRUE indicates that a height entry is incorrectly formatted.
C2. Regex

Q1: Given the following code

s = c("70" ,"5 ft", "4'11", "", ".", "Six feet"); s
[1] "70"       "5 ft"     "4'11"     ""         "."        "Six feet"

What pattern vector yields the following result?

pattern = "\\d|ft"
str_subset(s, pattern)
[1] "70"   "5 ft" "4'11"
C3. Character Classes, Anchors, and Qualifiers

Character Classes - []

yes = as.character(4:7)
no = as.character(1:3)
str_detect(c(yes,no), "[4-7]")
[1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE

Anchors - ^ and $

yes = c("1","5","9")
no = c("12","123"," 1","a4","b")
str_detect(c(yes,no), "^\\d$")
[1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE

Qualifiers - {}

yes = c("1","5","9","12")
no = c("123","a4","b")
str_detect(c(yes,no), "^\\d{1,2}$")
[1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE

Pattern of Feets & Inches

pattern = "^[4-7]'\\d{1,2}\"$"
yes = c("5'7\"", "6'2\"", "5'12\"")
no = c("6,2\"", "6.2\"", "I am 5'11\"", "3'2\"", "64")
str_detect(c(yes,no), pattern)
[1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE

Q1: You enter the following set of commands into your R console. What is your printed result?

animals <- c("cat", "puppy", "Moose", "MONKEY")
pattern <- "[a-z]"
str_detect(animals, pattern)
[1]  TRUE  TRUE  TRUE FALSE

Q2: You enter the following set of commands into your R console. What is your printed result?

animals <- c("cat", "puppy", "Moose", "MONKEY")
pattern <- "[A-Z]$"
str_detect(animals, pattern)
[1] FALSE FALSE FALSE  TRUE

Q3: You enter the following set of commands into your R console. What is your printed result?

animals <- c("cat", "puppy", "Moose", "MONKEY")
pattern <- "[a-z]{4,5}"
str_detect(animals, pattern)
[1] FALSE  TRUE  TRUE FALSE
C4. Search and Replace with Regex

Inital Pattern

pattern = "^[4-7]'\\d{1,2}$"
str_subset(problems, pattern)   # 23
 [1] "5'7"  "5'11" "5'3"  "5'12" "5'11" "5'4"  "5'5"  "5'8"  "5'6"  "5'4"  "5'5" 
[12] "5'7"  "5'6"  "5'7"  "5'8"  "5'10" "5'10" "5'2"  "5'11" "5'8"  "5'9"  "5'4" 
[23] "5'6"  "5'6" 

Replace Feet and Inches

pattern = "^[4-7]'\\d{1,2}$"
problems %>% 
  str_replace("feet|ft|foot","'") %>% 
  str_replace("inches|in|''|\"","") %>%
  str_detect(pattern) %>% 
  sum                           # 48 
[1] 48

The More Qualifiers

  • * : 0 or more
  • + : 1 or more
  • ? : 0 or 1
yes = c("AB","A1B","A11B","A111B","A1111B")
no = c("A2B","A21B")
str_detect(c(yes,no), "A1*B")
[1]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE

Space - \\s

pattern = "^[4-7]\\s*'\\s*\\d{1,2}$"
problems %>% 
  str_replace("feet|ft|foot","'") %>% 
  str_replace("inches|in|''|\"","") %>%
  str_detect(pattern) %>% 
  sum                           # 53 
[1] 53

Q1: Given the following code, which TWO pattern vectors would yield the following result?

animals <- c("moose", "monkey", "meerkat", "mountain lion")
pattern = c("mo*","mo?","mo+","moo*")
sapply(pattern, function(p) str_detect(animals, p)) %>% t
     [,1] [,2]  [,3] [,4]
mo*  TRUE TRUE  TRUE TRUE
mo?  TRUE TRUE  TRUE TRUE
mo+  TRUE TRUE FALSE TRUE
moo* TRUE TRUE FALSE TRUE

Q2: You are working on some data from different universities. You have the following vector

schools = c(
  "U. Kentucky","Univ New Hampshire","Univ. of Massachusetts",
  "University Georgia","U California","California State University"
  )

You want to clean this data to match the full names of each university. What of the following commands could accomplish this?

schools %>% 
  str_replace("^Univ\\.?\\s|^U\\.?\\s", "University ") %>% 
  str_replace("^University of |^University ", "University of ")
[1] "University of Kentucky"      "University of New Hampshire"
[3] "University of Massachusetts" "University of Georgia"      
[5] "University of California"    "California State University"
C5. Groups with Regex

Define Groups ()

pattern_no_group = "^[4-7],\\d*$"
pattern_group = "^([4-7]),(\\d*)$"
yes = c("5,9","5,11","6,","6,1")
no = c("5'9",",","2,8","6.1.1")
s = c(yes, no)

Groups do not affect pattern detection

str_detect(s, pattern_no_group)
[1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
str_detect(s, pattern_group)
[1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE

The difference between

  • str_match()
  • str_extarct()
  • str_subset()
  • str_detect()
str_match(s, pattern_group)
     [,1]   [,2] [,3]
[1,] "5,9"  "5"  "9" 
[2,] "5,11" "5"  "11"
[3,] "6,"   "6"  ""  
[4,] "6,1"  "6"  "1" 
[5,] NA     NA   NA  
[6,] NA     NA   NA  
[7,] NA     NA   NA  
[8,] NA     NA   NA  
str_extract(s, pattern_group)
[1] "5,9"  "5,11" "6,"   "6,1"  NA     NA     NA     NA    
str_subset(s, pattern_group)
[1] "5,9"  "5,11" "6,"   "6,1" 
str_detect(s, pattern_group)
[1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE

Replace with Group

pattern = "^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$"
str_subset(problems, pattern)
 [1] "5.3"   "5.25"  "5.5"   "6.5"   "5.8"   "5.6"   "5,3"   "5.9"   "6,8"   "5.5"  
[11] "6.2"   "6.2"   "5.8"   "5.1"   "5.11"  "5.75"  "5,4"   "5.4"   "6.1"   "5.6"  
[21] "5.6"   "5.4"   "5.9"   "5.6"   "5.6"   "5.5"   "5.2"   "5.5"   "5.5"   "6.5"  
[31] "5,8"   "5.11"  "5.5"   "6.7"   "5.1"   "5.6"   "5.5"   "5.2"   "5.6"   "5.7"  
[41] "5.9"   "6.5"   "5.11"  "5 .11" "5.7"   "5.5"   "5 11"  "5.8"   "5.8"   "5.1"  
[51] "5.11"  "5.7"   "5.9"   "5.2"   "5.5"   "5.51"  "5.8"   "5.7"   "6.1"   "5.69" 
[61] "5.7"   "5.25"  "5.5"   "5.1"   "6 04"  "6.3"   "5.5"   "5.7"   "5.57"  "5.7"  
str_subset(problems, pattern) %>% 
  str_replace(pattern, "\\1'\\2")
 [1] "5'3"  "5'25" "5'5"  "6'5"  "5'8"  "5'6"  "5'3"  "5'9"  "6'8"  "5'5"  "6'2" 
[12] "6'2"  "5'8"  "5'1"  "5'11" "5'75" "5'4"  "5'4"  "6'1"  "5'6"  "5'6"  "5'4" 
[23] "5'9"  "5'6"  "5'6"  "5'5"  "5'2"  "5'5"  "5'5"  "6'5"  "5'8"  "5'11" "5'5" 
[34] "6'7"  "5'1"  "5'6"  "5'5"  "5'2"  "5'6"  "5'7"  "5'9"  "6'5"  "5'11" "5'11"
[45] "5'7"  "5'5"  "5'11" "5'8"  "5'8"  "5'1"  "5'11" "5'7"  "5'9"  "5'2"  "5'5" 
[56] "5'51" "5'8"  "5'7"  "6'1"  "5'69" "5'7"  "5'25" "5'5"  "5'1"  "6'04" "6'3" 
[67] "5'5"  "5'7"  "5'57" "5'7" 

Q1: Rather than using the pattern_with_groups vector from the video, you accidentally write in the following code. What is your result?

pattern_w_groups = "^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$"
problems1 <- c("5.3", "5,5", "6 1", "5 .11", "5, 12")
pattern_with_groups <- "^([4-7])[,\\.](\\d*)$"
str_replace(problems1, pattern_with_groups, "\\1'\\2")
[1] "5'3"   "5'5"   "6 1"   "5 .11" "5, 12"

Q2: You notice your mistake and correct your pattern regex to the following What is your result?

problems1 <- c("5.3", "5,5", "6 1", "5 .11", "5, 12")
pattern_with_groups <- "^([4-7])[,\\.\\s](\\d*)$"
str_replace(problems1, pattern_with_groups, "\\1'\\2")
[1] "5'3"   "5'5"   "6'1"   "5 .11" "5, 12"

I think what it intends to do is …

problems1 <- c("5.3", "5,5", "6 1", "5 .11", "5, 12")
pattern_with_groups <- "^([4-7])\\s*[,\\.\\s]\\s*(\\d*)$"
str_replace(problems1, pattern_with_groups, "\\1'\\2")
[1] "5'3"  "5'5"  "6'1"  "5'11" "5'12"
C6. Testing and Improving
converted <- problems %>% 
  str_replace("feet|foot|ft", "'") %>% 
  str_replace("inches|in|''|\"", "") %>% 
  str_replace("^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$", "\\1'\\2")
pattern <- "^[4-7]\\s*'\\s*\\d{1,2}$"
index <- str_detect(converted, pattern)
mean(index)          # 0.42123
[1] 0.4212
converted[!index]    
  [1] "6"             "165cm"         "511"           "6"            
  [5] "2"             ">9000"         "5 ' and 8.11 " "11111"        
  [9] "6"             "150"           "103.2"         "19"           
 [13] "5"             "175"           "177"           "300"          
 [17] "6'"            "6"             "178"           "163"          
 [21] "175"           "Five ' eight " "178"           "165"          
 [25] "165"           "180"           "169"           "7"            
 [29] "157"           "169"           "214"           "183"          
 [33] "6"             "162"           "178"           "180"          
 [37] "170"           "178"           "0.7"           "190"          
 [41] "184"           "184"           "6"             "167"          
 [45] "2'33"          "180"           "180"           "183"          
 [49] "170"           "172"           "612"           "168"          
 [53] "1,70"          "172"           "87"            "176"          
 [57] "5'7.5"         "5'7.5"         "111"           "173"          
 [61] "174"           "176"           "175"           "5' 7.78"      
 [65] "12"            "6"             "yyy"           "89"           
 [69] "183"           "172"           "34"            "25"           
 [73] "6"             "168"           "170"           "175"          
 [77] "6"             "22"            "684"           "6"            
 [81] "1"             "1"             "6*12"          "87"           
 [85] "162"           "165"           "184"           "6"            
 [89] "173"           "1.6"           "172"           "170"          
 [93] "174"           "170"           "160"           "120"          
 [97] "120"           "23"            "192"           "167"          
[101] "150"           "1.7"           "174"           "6"            
[105] "172"           "180"           "5"             "180"          
[109] "180"           "69"            "5' 9 "         "5 ' 9 "       
[113] "167"           "168"           "6"             "178"          
[117] "182"           "164"           "185"           "6"            
[121] "86"            "708,661"       "5 ' 6 "        "172"          
[125] "6"             "160"           "649,606"       "10000"        
[129] "152"           "1"             "180"           "728,346"      
[133] "175"           "158"           "173"           "164"          
[137] "169"           "0"             "185"           "168"          
[141] "169"           "174"           "179"           "6"            
[145] "6"             "170"           "6"             "172"          
[149] "158"           "100"           "159"           "190"          
[153] "170"           "158"           "180"           "210"          
[157] "88"            "6"             "162"           "170 cm"       
[161] "170"           "157"           "186"           "170"          
[165] "7,283,465"     "5"             "5"             "34"           
[169] "161"          

Q1: In our example, we use the following code to detect height entries that do not match our pattern of x’y”.

problems1 <- c("5.3", "5,5", "6 1", "5 .11", "5, 12")
converted1 <- problems1 %>% 
  str_replace("feet|foot|ft", "'") %>% 
  str_replace("inches|in|''|\"", "") %>% 
  str_replace("^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$", "\\1'\\2")

pattern <- "^[4-7]\\s*'\\s*\\d{1,2}$"
index <- str_detect(converted1, pattern)
converted1[!index]

Which answer best describes the differences between the regex string we use as an argument in
str_replace("^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$", "\\1'\\2")
And the regex string in
pattern <- "^[4-7]\\s*'\\s*\\d{1,2}$"?

  • The regex used in str_replace looks for either a comma, period or space between the feet and inches digits, while the pattern regex just looks for an apostrophe; the regex in str_replace allows for none or more digits to be entered as inches, while the pattern regex only allows for one or two digits.

Q2: You notice a few entries that are not being properly converted using your str_replace and str_detect code

yes <- c("5 feet 7inches")
no <- c("5ft 9 inches", "5 ft 9 inches")
s <- c(yes, no)
converted <- s %>% 
  str_replace("feet|foot|ft", "'") %>% 
  str_replace("inches|in|''|\"", "") %>% 
  str_replace("^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$", "\\1'\\2")
converted
[1] "5 ' 7"  "5' 9 "  "5 ' 9 "
pattern <- "^[4-7]\\s*'\\s*\\d{1,2}$"
str_detect(converted, pattern)
[1]  TRUE FALSE FALSE

It seems like the problem may be due to spaces around the words feet|foot|ft and inches|in. What is another way you could fix this problem?

converted <- s %>% 
  str_replace("\\s*feet|foot|ft\\s*", "'") %>% 
  str_replace("\\s*inches|in|''|\"\\s*", "") %>% 
  str_replace("^([4-7])\\s*[,\\.\\s+]\\s*(\\d*)$", "\\1'\\2")
converted
[1] "5' 7" "5'9"  "5 '9"
pattern <- "^[4-7]\\s*'\\s*\\d{1,2}$"
str_detect(converted, pattern)
[1] TRUE TRUE TRUE



D. String Processing Part 3

D2. Separate with Regex
s = c("5'10", "6'1")
tab = data.frame(x = s)
separate(tab, x, c("feet", "inches"), sep="'")
  feet inches
1    5     10
2    6      1
extract(tab, x, c("feet", "inches"), regex="(\\d)'(\\d{1,2})")
  feet inches
1    5     10
2    6      1
s = c("5'10", "6'1\"","5'8inches")
tab = data.frame(x = s)
separate(tab, x, c("feet", "inches"), sep="'")
  feet  inches
1    5      10
2    6      1"
3    5 8inches
extract(tab, x, c("feet", "inches"), regex="(\\d)'(\\d{1,2})")
  feet inches
1    5     10
2    6      1
3    5      8
D1. Using Groups and Quantifiers

** Q1:** If you use the extract code from our video, the decimal point is dropped. What modification of the code would allow you to put the decimals in a third column called “decimal”?

library(tidyr)
s <- c("5'10", "6'1\"", "5'8inches", "5'7.5")
tab <- data.frame(x = s)
rx = c("(\\d)'(\\d{1,2})(\\.)?", 
       "(\\d)'(\\d{1,2})(\\.\\d+)",
       "(\\d)'(\\d{1,2})\\.\\d+?",
       "(\\d)'(\\d{1,2})(\\.\\d+)?")
extract(tab, x, into=c("feet", "inches", "decimal"), regex=rx[4])
  feet inches decimal
1    5     10    <NA>
2    6      1    <NA>
3    5      8    <NA>
4    5      7      .5
D4. String Splitting
filename =  system.file("extdata/murders.csv", package="dslabs")
lines = readLines(filename)
head(lines)
[1] "state,abb,region,population,total" "Alabama,AL,South,4779736,135"     
[3] "Alaska,AK,West,710231,19"          "Arizona,AZ,West,6392017,232"      
[5] "Arkansas,AR,South,2915918,93"      "California,CA,West,37253956,1257" 
x = str_split(lines, ",", simplify=T)
head(x)
     [,1]         [,2]  [,3]     [,4]         [,5]   
[1,] "state"      "abb" "region" "population" "total"
[2,] "Alabama"    "AL"  "South"  "4779736"    "135"  
[3,] "Alaska"     "AK"  "West"   "710231"     "19"   
[4,] "Arizona"    "AZ"  "West"   "6392017"    "232"  
[5,] "Arkansas"   "AR"  "South"  "2915918"    "93"   
[6,] "California" "CA"  "West"   "37253956"   "1257" 
as.data.frame(x[-1,]) %>% 
  setNames(x[1,]) %>% 
  mutate_all(parse_guess) %>% 
  head(10)
                  state abb    region population total
1               Alabama  AL     South    4779736   135
2                Alaska  AK      West     710231    19
3               Arizona  AZ      West    6392017   232
4              Arkansas  AR     South    2915918    93
5            California  CA      West   37253956  1257
6              Colorado  CO      West    5029196    65
7           Connecticut  CT Northeast    3574097    97
8              Delaware  DE     South     897934    38
9  District of Columbia  DC     South     601723    99
10              Florida  FL     South   19687653   669

Q1: You have the following table

schedule = data.frame(
  day = c("Monday", "Tuesday"),
  staff = c("Mandy, Chris and Laura", "Steve, Ruth and Frank"))
schedule
      day                  staff
1  Monday Mandy, Chris and Laura
2 Tuesday  Steve, Ruth and Frank

Which two commands would properly split the text in the “Staff” column into each individual name? Check all that apply.

lapply(c(",|and", ", | and ", ",\\s|\\sand\\s", "\\s?(,|and)\\s?"),
       function(r) str_split(schedule$staff, r, simplify=T))
[[1]]
     [,1]    [,2]     [,3]      [,4]    
[1,] "M"     "y"      " Chris " " Laura"
[2,] "Steve" " Ruth " " Frank"  ""      

[[2]]
     [,1]    [,2]    [,3]   
[1,] "Mandy" "Chris" "Laura"
[2,] "Steve" "Ruth"  "Frank"

[[3]]
     [,1]    [,2]    [,3]   
[1,] "Mandy" "Chris" "Laura"
[2,] "Steve" "Ruth"  "Frank"

[[4]]
     [,1]    [,2]   [,3]    [,4]   
[1,] "M"     "y"    "Chris" "Laura"
[2,] "Steve" "Ruth" "Frank" ""     

Q2: What code would successfully turn your “Schedule” table into the following tidy table

schedule %>% 
  mutate(staff = str_split(staff, ", | and ")) %>% 
  unnest()
      day staff
1  Monday Mandy
2  Monday Chris
3  Monday Laura
4 Tuesday Steve
5 Tuesday  Ruth
6 Tuesday Frank
D.6 Recoding
library(ggplot2)
data("gapminder")
gapminder %>% filter(region == "Caribbean") %>% 
  ggplot(aes(year, life_expectancy, color=country)) +
  geom_line()

gapminder %>% filter(region == "Caribbean") %>% 
  filter(str_length(country) >= 12) %>% 
  distinct(country)
                         country
1            Antigua and Barbuda
2             Dominican Republic
3 St. Vincent and the Grenadines
4            Trinidad and Tobago
gapminder %>% filter(region == "Caribbean") %>% 
  mutate(country = recode(
    country, 
    `Antigua and Barbuda` = "Barbuda",
    `Dominican Republic` = "DR",
    `St. Vincent and the Grenadines` = "St. Vincent",
    `Trinidad and Tobago` = "Trinidad"
  )) %>% 
  ggplot(aes(year, life_expectancy, color=country)) +
  geom_line()

Q1: Using the gapminder data, you want to recode countries longer than 12 letters in the region Middle Africa to their abbreviations in a new column, country_short. Which code would accomplish this?

library(dslabs)
data(gapminder)
gapminder %>% filter(region == "Middle Africa") %>% 
  filter(nchar(as.character(country)) >= 12) %>% 
  select(region, country) %>% distinct() %>% 
  mutate(country_short = recode(country, 
    "Central African Republic" = "CAR", 
    "Congo, Dem. Rep." = "DRC",
    "Equatorial Guinea" = "Eq. Guinea"
    ) )
         region                  country country_short
1 Middle Africa Central African Republic           CAR
2 Middle Africa         Congo, Dem. Rep.           DRC
3 Middle Africa        Equatorial Guinea    Eq. Guinea



E. Date, Times and Text Mining

E1. Dates and Times

Q1: Which of the following is the standard ISO 8601 format for dates?

  • YYYY-MM-DD

Q2: Which of the following commands could convert this string into the correct date format?

library(lubridate)
dates <- c("09-01-02", "01-12-07", "02-03-04")
ymd(dates)
[1] "2009-01-02" "2001-12-07" "2002-03-04"
mdy(dates)
[1] "2002-09-01" "2007-01-12" "2004-02-03"
dmy(dates)
[1] "2002-01-09" "2007-12-01" "2004-03-02"
  • It is impossible to know which format is correct without additional information.






LS0tDQp0aXRsZTogIldyYW5nbGluZywgU3RyaW5nIFByb2Nlc3NpbmcgJiBEYXRlL1RpbWUiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQo8YnI+DQoNCmBgYHtyfQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwic3RyaW5nciIsICJkc2xhYnMiLCAicmVhZHIiLCAidGlkeXIiLCAicHVycnIiLA0KICAibHVicmlkYXRlIiwgInJ2ZXN0Ig0KICApDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpTeXMuc2V0bG9jYWxlKCJMQ19BTEwiLCJDIikNCm9wdGlvbnMoZGlnaXRzPTQsIHNjaXBlbj0xMikNCmxpYnJhcnkocnZlc3QpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoc3RyaW5ncikNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkoZHNsYWJzKQ0KYGBgDQoNCi0gLSAtDQoNCiMjIyBUaWR5IERhdGENCg0KIyMjIyMgV2ViIFNjcmFwaW5nDQpgYGB7cn0NCmxpYnJhcnkocnZlc3QpDQp1cmwgPSAiaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvTXVyZGVyX2luX3RoZV9Vbml0ZWRfU3RhdGVzX2J5X3N0YXRlIg0KaCA9IHJlYWRfaHRtbCh1cmwpDQoNCnRhYiA9IGh0bWxfbm9kZXMoaCwgInRhYmxlIilbWzJdXSAlPiUNCiAgaHRtbF90YWJsZSAlPiUgDQogIHNldE5hbWVzKGMoDQogICAgInN0YXRlIiwicG9wdWxhdGlvbiIsInRvdGFsIiwibXVyZGVycyIsImd1bl9tdXJkZXJzIiwNCiAgICAiZ3VuX293bmVyc2ppcCIsInRvdGFsX3JhdGUiLCJtdXJkZXJfcmF0ZSIsImd1bl9tdXJkZXJfcmF0ZSIpKQ0KYGBgDQoNCi0gLSAtDQoNCiMjIyBBLiBTdHJpbmcgUHJvY2Vzc2luZyBPdmVydmlldw0KDQotIC0gLQ0KDQojIyMgQi4gU3RyaW5nIFByb2Nlc3NpbmcgUGFydCAxIA0KDQojIyMjIyBCMS4gU3RyaW5nIFBhcnNpbmcNCg0KKipRMToqKiBfV2hpY2ggb2YgdGhlIGZvbGxvd2luZyBpcyBOT1QgYW4gYXBwbGljYXRpb24gb2Ygc3RyaW5nIHBhcnNpbmc/Xw0KDQorIEZvcm1hdHRpbmcgbnVtYmVycyBhbmQgY2hhcmFjdGVycyBzbyB0aGV5IGNhbiBlYXNpbHkgYmUgZGlzcGxheWVkIGluIGRlbGl2ZXJhYmxlcyBsaWtlIHBhcGVycyBhbmQgcHJlc2VudGF0aW9ucy4NCisNCg0KIyMjIyMgQjIuIERlZmluaW5nIFN0cmluZ3M6IFNpbmdsZSBhbmQgRG91YmxlIFF1b3RlcyBhbmQgSG93IHRvIEVzY2FwZQ0KDQoqKlExOioqIF9XaGljaCBvZiB0aGUgZm9sbG93aW5nIGNvbW1hbmRzIHdvdWxkIG5vdCBnaXZlIHlvdSBhbiBlcnJvciBpbiBSP18NCmBgYHtyfQ0KY2F0KCIgTGVCcm9uIEphbWVzIGlzIDYnOFwiICIpDQpgYGANCg0KIyMjIyMgQjMuIGBzdHJpbmdyYCBQYWNrYWdlDQoNCioqUTE6KiogX1doaWNoIG9mIHRoZSBmb2xsb3dpbmcgYXJlIGFkdmFudGFnZXMgb2YgdGhlIHN0cmluZ3IgcGFja2FnZSBvdmVyIHN0cmluZyBwcm9jZXNzaW5nIGZ1bmN0aW9ucyBpbiBiYXNlIFI/IFNlbGVjdCBhbGwgdGhhdCBhcHBseS5fDQoNCisgRnVuY3Rpb25zIGluIHN0cmluZ3IgYWxsIHN0YXJ0IHdpdGgg4oCcc3RyX+KAnSwgd2hpY2ggbWFrZXMgdGhlbSBlYXN5IHRvIGxvb2sgdXAgdXNpbmcgYXV0b2NvbXBsZXRlLg0KKyBTdHJpbmdyIGZ1bmN0aW9ucyB3b3JrIGJldHRlciB3aXRoIHBpcGVzLg0KKyBoZSBvcmRlciBvZiBhcmd1bWVudHMgaXMgbW9yZSBjb25zaXN0ZW50IGluIHN0cmluZ3IgZnVuY3Rpb25zIHRoYW4gaW4gYmFzZSBSLg0KKw0KDQojIyMjIyBCNC4gQ2FzZSBTdHVkeSAxOiBVUyBNdXJkZXJzIERhdGENCg0KYGBge3J9DQpzYXBwbHkodGFiLCBzdHJfZGV0ZWN0LCAiLCIpICU+JSBjb2xTdW1zDQpgYGANCg0KYGBge3J9DQp0YWIgPSB0YWIgJT4lIG11dGF0ZV9hdCgyOjMsIHBhcnNlX251bWJlcikNCnNhcHBseSh0YWIsIHN0cl9kZXRlY3QsICIsIikgJT4lIGNvbFN1bXMNCmBgYA0KDQoNCioqUTE6KiogWW91IGhhdmUgYSBkYXRhZnJhbWUgb2YgbW9udGhseSBzYWxlcyBhbmQgcHJvZml0cyBpbiBSDQpgYGB7cn0NCmRhdCA9IHJlYWQudGFibGUoImRhdGEvc2FsZXMudHh0IiwgaGVhZGVyPVQsIHNlcD0iIiwgc3RyaW5nc0FzRmFjdG9ycz1GKQ0KZGF0DQpgYGANCg0KX1doaWNoIG9mIHRoZSBmb2xsb3dpbmcgY29tbWFuZHMgY291bGQgY29udmVydCB0aGUgc2FsZXMgYW5kIHByb2ZpdHMgY29sdW1ucyB0byBudW1lcmljPyBTZWxlY3QgYWxsIHRoYXQgYXBwbHkuXw0KYGBge3J9DQpkYXQgJT4lIG11dGF0ZV9hdCgyOjMsIHBhcnNlX251bWJlcikNCmBgYA0KDQpgYGB7cn0NCmRhdCAlPiUgbXV0YXRlX2F0KDI6MywgZnVucyhzdHJfcmVwbGFjZV9hbGwoLiwgYygiXFwkfCwiKSwgIiIpKSkNCmBgYA0KDQpgYGB7ciBldmFsPUZ9DQpkYXQgJT4lIG11dGF0ZV9hbGwoMjozLCBwYXJzZV9udW1iZXIpDQpgYGANCg0KYGBge3J9DQpkYXQkUHJvZml0IDwtIHN0cl9yZXBsYWNlX2FsbChkYXQkUHJvZml0LCBjKCJcXCR8LCIpLCAiIikgDQpkYXQkU2FsZXMgPC0gcGFyc2VfbnVtYmVyKGRhdCRTYWxlcykgDQpkYXQNCmBgYA0KDQotIC0gLQ0KDQojIyMgQy4gU3RyaW5nIFByb2Nlc3NpbmcgUGFydCAyDQoNCiMjIyMjIEMxLiBDYXNlIFN0dWR5MjogUmVwb3J0ZWQgSGVpZ2h0cw0KDQpgYGB7cn0NCmxpYnJhcnkoZHNsYWJzKQ0KZGF0YShyZXBvcnRlZF9oZWlnaHRzKQ0KcmVwb3J0ZWRfaGVpZ2h0cyAlPiUgaGVhZA0KYGBgDQoNCmBgYHtyfQ0KcmVwb3J0ZWRfaGVpZ2h0cyAlPiUNCiAgbXV0YXRlKG5ld19oZWlnaHQgPSBhcy5udW1lcmljKGhlaWdodCkpICU+JSANCiAgZmlsdGVyKGlzLm5hKG5ld19oZWlnaHQpKSAlPiUgDQogIGdldEVsZW1lbnQoImhlaWdodCIpDQpgYGANCg0KYGBge3J9DQpub3RfaW5jaGVzIDwtIGZ1bmN0aW9uKHgsIHNtYWxsZXN0ID0gNTAsIHRhbGxlc3QgPSA4NCkgew0KICBpbmNoZXMgPC0gc3VwcHJlc3NXYXJuaW5ncyhhcy5udW1lcmljKHgpKQ0KICBpbmQgPC0gaXMubmEoaW5jaGVzKSB8IGluY2hlcyA8IHNtYWxsZXN0IHwgaW5jaGVzID4gdGFsbGVzdCANCiAgaW5kfQ0KYGBgDQoNCmBgYHtyfQ0KcHJvYmxlbXMgPSByZXBvcnRlZF9oZWlnaHRzJGhlaWdodCAlPiUgLltub3RfaW5jaGVzKC4pXQ0KcHJvYmxlbXMNCmBgYA0KDQpgYGB7cn0NCnN0cl9zdWJzZXQocHJvYmxlbXMsICJjbXxpbmNoZXMiKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyX3N1YnNldChwcm9ibGVtcywgImNtfGluY2hlcyIpICU+JSBzdHJfZXh0cmFjdCgiY218aW5jaGVzIikNCmBgYA0KDQoNCioqUTE6KiogSW4gdGhlIHZpZGVvLCB3ZSB1c2UgdGhlIGZ1bmN0aW9uIGBub3RfaW5jaGVzYCB0byBpZGVudGlmeSBoZWlnaHRzIHRoYXQgd2VyZSBpbmNvcnJlY3RseSBlbnRlcmVkDQpgYGB7cn0NCm5vdF9pbmNoZXMgPC0gZnVuY3Rpb24oeCwgc21hbGxlc3QgPSA1MCwgdGFsbGVzdCA9IDg0KSB7DQogIGluY2hlcyA8LSBzdXBwcmVzc1dhcm5pbmdzKGFzLm51bWVyaWMoeCkpDQogIGluZCA8LSBpcy5uYShpbmNoZXMpIHwgaW5jaGVzIDwgc21hbGxlc3QgfCBpbmNoZXMgPiB0YWxsZXN0IA0KICBpbmQNCn0NCmBgYA0KSW4gdGhpcyBmdW5jdGlvbiwgX3doYXQgVFdPIHR5cGVzIG9mIHZhbHVlcyBhcmUgaWRlbnRpZmllZCBhcyBub3QgYmVpbmcgY29ycmVjdGx5IGZvcm1hdHRlZCBpbiBpbmNoZXM/Xw0KDQorIFZhbHVlcyB0aGF0IHJlc3VsdCBpbiBOQeKAmXMgd2hlbiBjb252ZXJ0ZWQgdG8gbnVtZXJpYw0KKyBWYWx1ZXMgbGVzcyB0aGFuIDUwIGluY2hlcyBvciBncmVhdGVyIHRoYW4gODQgaW5jaGVzDQorIA0KDQoqKlEyOioqIFdoaWNoIG9mIHRoZSBmb2xsb3dpbmcgYXJndW1lbnRzLCB3aGVuIHBhc3NlZCB0byB0aGUgZnVuY3Rpb24gbm90X2luY2hlcywgd291bGQgcmV0dXJuIHRoZSB2ZWN0b3IgYGMoRkFMU0UpYD8NCmBgYHtyfQ0KYyg3MCkgJT4lIG5vdF9pbmNoZXMNCmBgYA0KDQoqKlEzOioqIE91ciBmdW5jdGlvbiBgbm90X2luY2hlc2AgcmV0dXJucyB0aGUgb2JqZWN0IGBpbmRgLiBXaGljaCBhbnN3ZXIgY29ycmVjdGx5IGRlc2NyaWJlcyBgaW5kYD8NCg0KKyBgaW5kYCBpcyBhIGxvZ2ljYWwgdmVjdG9yIG9mIGBUUlVFYCBhbmQgYEZBTFNFYCwgZXF1YWwgaW4gbGVuZ3RoIHRvIHRoZSB2ZWN0b3IgYHhgIChpbiB0aGUgYXJndW1lbnRzIGxpc3QpLiBgVFJVRWAgaW5kaWNhdGVzIHRoYXQgYSBoZWlnaHQgZW50cnkgaXMgaW5jb3JyZWN0bHkgZm9ybWF0dGVkLg0KKw0KDQoNCiMjIyMjIEMyLiBSZWdleA0KDQoqKlExOioqIEdpdmVuIHRoZSBmb2xsb3dpbmcgY29kZQ0KYGBge3J9DQpzID0gYygiNzAiICwiNSBmdCIsICI0JzExIiwgIiIsICIuIiwgIlNpeCBmZWV0Iik7IHMNCmBgYA0KDQpfV2hhdCBwYXR0ZXJuIHZlY3RvciB5aWVsZHMgdGhlIGZvbGxvd2luZyByZXN1bHQ/Xw0KYGBge3J9DQpwYXR0ZXJuID0gIlxcZHxmdCINCnN0cl9zdWJzZXQocywgcGF0dGVybikNCmBgYA0KDQojIyMjIyBDMy4gQ2hhcmFjdGVyIENsYXNzZXMsIEFuY2hvcnMsIGFuZCBRdWFsaWZpZXJzDQoNCkNoYXJhY3RlciBDbGFzc2VzIC0gYFtdYA0KYGBge3J9DQp5ZXMgPSBhcy5jaGFyYWN0ZXIoNDo3KQ0Kbm8gPSBhcy5jaGFyYWN0ZXIoMTozKQ0Kc3RyX2RldGVjdChjKHllcyxubyksICJbNC03XSIpDQpgYGANCg0KQW5jaG9ycyAtIGBeYCBhbmQgYCRgDQpgYGB7cn0NCnllcyA9IGMoIjEiLCI1IiwiOSIpDQpubyA9IGMoIjEyIiwiMTIzIiwiIDEiLCJhNCIsImIiKQ0Kc3RyX2RldGVjdChjKHllcyxubyksICJeXFxkJCIpDQpgYGANCg0KUXVhbGlmaWVycyAtIGB7fWANCmBgYHtyfQ0KeWVzID0gYygiMSIsIjUiLCI5IiwiMTIiKQ0Kbm8gPSBjKCIxMjMiLCJhNCIsImIiKQ0Kc3RyX2RldGVjdChjKHllcyxubyksICJeXFxkezEsMn0kIikNCmBgYA0KDQpQYXR0ZXJuIG9mIEZlZXRzICYgSW5jaGVzDQpgYGB7cn0NCnBhdHRlcm4gPSAiXls0LTddJ1xcZHsxLDJ9XCIkIg0KeWVzID0gYygiNSc3XCIiLCAiNicyXCIiLCAiNScxMlwiIikNCm5vID0gYygiNiwyXCIiLCAiNi4yXCIiLCAiSSBhbSA1JzExXCIiLCAiMycyXCIiLCAiNjQiKQ0Kc3RyX2RldGVjdChjKHllcyxubyksIHBhdHRlcm4pDQpgYGANCg0KKipRMToqKiBZb3UgZW50ZXIgdGhlIGZvbGxvd2luZyBzZXQgb2YgY29tbWFuZHMgaW50byB5b3VyIFIgY29uc29sZS4gX1doYXQgaXMgeW91ciBwcmludGVkIHJlc3VsdD9fDQpgYGB7cn0NCmFuaW1hbHMgPC0gYygiY2F0IiwgInB1cHB5IiwgIk1vb3NlIiwgIk1PTktFWSIpDQpwYXR0ZXJuIDwtICJbYS16XSINCnN0cl9kZXRlY3QoYW5pbWFscywgcGF0dGVybikNCmBgYA0KDQoqKlEyOioqIFlvdSBlbnRlciB0aGUgZm9sbG93aW5nIHNldCBvZiBjb21tYW5kcyBpbnRvIHlvdXIgUiBjb25zb2xlLiBfV2hhdCBpcyB5b3VyIHByaW50ZWQgcmVzdWx0P18gDQpgYGB7cn0NCmFuaW1hbHMgPC0gYygiY2F0IiwgInB1cHB5IiwgIk1vb3NlIiwgIk1PTktFWSIpDQpwYXR0ZXJuIDwtICJbQS1aXSQiDQpzdHJfZGV0ZWN0KGFuaW1hbHMsIHBhdHRlcm4pDQpgYGANCg0KKipRMzoqKiBZb3UgZW50ZXIgdGhlIGZvbGxvd2luZyBzZXQgb2YgY29tbWFuZHMgaW50byB5b3VyIFIgY29uc29sZS4gX1doYXQgaXMgeW91ciBwcmludGVkIHJlc3VsdD9fDQpgYGB7cn0NCmFuaW1hbHMgPC0gYygiY2F0IiwgInB1cHB5IiwgIk1vb3NlIiwgIk1PTktFWSIpDQpwYXR0ZXJuIDwtICJbYS16XXs0LDV9Ig0Kc3RyX2RldGVjdChhbmltYWxzLCBwYXR0ZXJuKQ0KYGBgDQoNCiMjIyMjIEM0LiBTZWFyY2ggYW5kIFJlcGxhY2Ugd2l0aCBSZWdleA0KDQpJbml0YWwgUGF0dGVybg0KYGBge3J9DQpwYXR0ZXJuID0gIl5bNC03XSdcXGR7MSwyfSQiDQpzdHJfc3Vic2V0KHByb2JsZW1zLCBwYXR0ZXJuKSAgICMgMjMNCmBgYA0KDQpSZXBsYWNlIEZlZXQgYW5kIEluY2hlcw0KYGBge3J9DQpwYXR0ZXJuID0gIl5bNC03XSdcXGR7MSwyfSQiDQpwcm9ibGVtcyAlPiUgDQogIHN0cl9yZXBsYWNlKCJmZWV0fGZ0fGZvb3QiLCInIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiaW5jaGVzfGlufCcnfFwiIiwiIikgJT4lDQogIHN0cl9kZXRlY3QocGF0dGVybikgJT4lIA0KICBzdW0gICAgICAgICAgICAgICAgICAgICAgICAgICAjIDQ4IA0KYGBgDQoNClRoZSBNb3JlIFF1YWxpZmllcnMNCg0KKyBgKmAgOiAwIG9yIG1vcmUNCisgYCtgIDogMSBvciBtb3JlDQorIGA/YCA6IDAgb3IgMQ0KDQpgYGB7cn0NCnllcyA9IGMoIkFCIiwiQTFCIiwiQTExQiIsIkExMTFCIiwiQTExMTFCIikNCm5vID0gYygiQTJCIiwiQTIxQiIpDQpzdHJfZGV0ZWN0KGMoeWVzLG5vKSwgIkExKkIiKQ0KYGBgDQoNClNwYWNlIC0gYFxcc2ANCmBgYHtyfQ0KcGF0dGVybiA9ICJeWzQtN11cXHMqJ1xccypcXGR7MSwyfSQiDQpwcm9ibGVtcyAlPiUgDQogIHN0cl9yZXBsYWNlKCJmZWV0fGZ0fGZvb3QiLCInIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiaW5jaGVzfGlufCcnfFwiIiwiIikgJT4lDQogIHN0cl9kZXRlY3QocGF0dGVybikgJT4lIA0KICBzdW0gICAgICAgICAgICAgICAgICAgICAgICAgICAjIDUzIA0KYGBgDQoNCioqUTE6KiogR2l2ZW4gdGhlIGZvbGxvd2luZyBjb2RlLCBfd2hpY2ggVFdPIGBwYXR0ZXJuYCB2ZWN0b3JzIHdvdWxkIHlpZWxkIHRoZSBmb2xsb3dpbmcgcmVzdWx0P18NCmBgYHtyfQ0KYW5pbWFscyA8LSBjKCJtb29zZSIsICJtb25rZXkiLCAibWVlcmthdCIsICJtb3VudGFpbiBsaW9uIikNCnBhdHRlcm4gPSBjKCJtbyoiLCJtbz8iLCJtbysiLCJtb28qIikNCnNhcHBseShwYXR0ZXJuLCBmdW5jdGlvbihwKSBzdHJfZGV0ZWN0KGFuaW1hbHMsIHApKSAlPiUgdA0KYGBgDQoNCg0KKipRMjoqKiBZb3UgYXJlIHdvcmtpbmcgb24gc29tZSBkYXRhIGZyb20gZGlmZmVyZW50IHVuaXZlcnNpdGllcy4gWW91IGhhdmUgdGhlIGZvbGxvd2luZyB2ZWN0b3INCmBgYHtyfQ0Kc2Nob29scyA9IGMoDQogICJVLiBLZW50dWNreSIsIlVuaXYgTmV3IEhhbXBzaGlyZSIsIlVuaXYuIG9mIE1hc3NhY2h1c2V0dHMiLA0KICAiVW5pdmVyc2l0eSBHZW9yZ2lhIiwiVSBDYWxpZm9ybmlhIiwiQ2FsaWZvcm5pYSBTdGF0ZSBVbml2ZXJzaXR5Ig0KICApDQpgYGANCg0KWW91IHdhbnQgdG8gY2xlYW4gdGhpcyBkYXRhIHRvIG1hdGNoIHRoZSBmdWxsIG5hbWVzIG9mIGVhY2ggdW5pdmVyc2l0eS4gX1doYXQgb2YgdGhlIGZvbGxvd2luZyBjb21tYW5kcyBjb3VsZCBhY2NvbXBsaXNoIHRoaXM/Xw0KYGBge3J9DQpzY2hvb2xzICU+JSANCiAgc3RyX3JlcGxhY2UoIl5Vbml2XFwuP1xcc3xeVVxcLj9cXHMiLCAiVW5pdmVyc2l0eSAiKSAlPiUgDQogIHN0cl9yZXBsYWNlKCJeVW5pdmVyc2l0eSBvZiB8XlVuaXZlcnNpdHkgIiwgIlVuaXZlcnNpdHkgb2YgIikNCmBgYA0KDQojIyMjIyBDNS4gR3JvdXBzIHdpdGggUmVnZXgNCg0KRGVmaW5lIEdyb3VwcyBgKClgIA0KYGBge3J9DQpwYXR0ZXJuX25vX2dyb3VwID0gIl5bNC03XSxcXGQqJCINCnBhdHRlcm5fZ3JvdXAgPSAiXihbNC03XSksKFxcZCopJCINCnllcyA9IGMoIjUsOSIsIjUsMTEiLCI2LCIsIjYsMSIpDQpubyA9IGMoIjUnOSIsIiwiLCIyLDgiLCI2LjEuMSIpDQpzID0gYyh5ZXMsIG5vKQ0KYGBgDQoNCkdyb3VwcyBkbyBub3QgYWZmZWN0IHBhdHRlcm4gZGV0ZWN0aW9uDQpgYGB7cn0NCnN0cl9kZXRlY3QocywgcGF0dGVybl9ub19ncm91cCkNCnN0cl9kZXRlY3QocywgcGF0dGVybl9ncm91cCkNCmBgYA0KDQpUaGUgZGlmZmVyZW5jZSBiZXR3ZWVuIA0KDQorIGBzdHJfbWF0Y2goKWANCisgYHN0cl9leHRhcmN0KClgIA0KKyBgc3RyX3N1YnNldCgpYCANCisgYHN0cl9kZXRlY3QoKWAgDQorIA0KDQpgYGB7cn0NCnN0cl9tYXRjaChzLCBwYXR0ZXJuX2dyb3VwKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyX2V4dHJhY3QocywgcGF0dGVybl9ncm91cCkNCmBgYA0KDQpgYGB7cn0NCnN0cl9zdWJzZXQocywgcGF0dGVybl9ncm91cCkNCmBgYA0KDQpgYGB7cn0NCnN0cl9kZXRlY3QocywgcGF0dGVybl9ncm91cCkNCmBgYA0KDQpSZXBsYWNlIHdpdGggR3JvdXANCmBgYHtyfQ0KcGF0dGVybiA9ICJeKFs0LTddKVxccypbLFxcLlxccytdXFxzKihcXGQqKSQiDQpzdHJfc3Vic2V0KHByb2JsZW1zLCBwYXR0ZXJuKQ0Kc3RyX3N1YnNldChwcm9ibGVtcywgcGF0dGVybikgJT4lIA0KICBzdHJfcmVwbGFjZShwYXR0ZXJuLCAiXFwxJ1xcMiIpDQpgYGANCg0KKipRMToqKiBSYXRoZXIgdGhhbiB1c2luZyB0aGUgcGF0dGVybl93aXRoX2dyb3VwcyB2ZWN0b3IgZnJvbSB0aGUgdmlkZW8sIHlvdSBhY2NpZGVudGFsbHkgd3JpdGUgaW4gdGhlIGZvbGxvd2luZyBjb2RlLiBfV2hhdCBpcyB5b3VyIHJlc3VsdD9fDQpgYGB7cn0NCnBhdHRlcm5fd19ncm91cHMgPSAiXihbNC03XSlcXHMqWyxcXC5cXHMrXVxccyooXFxkKikkIg0KcHJvYmxlbXMxIDwtIGMoIjUuMyIsICI1LDUiLCAiNiAxIiwgIjUgLjExIiwgIjUsIDEyIikNCnBhdHRlcm5fd2l0aF9ncm91cHMgPC0gIl4oWzQtN10pWyxcXC5dKFxcZCopJCINCnN0cl9yZXBsYWNlKHByb2JsZW1zMSwgcGF0dGVybl93aXRoX2dyb3VwcywgIlxcMSdcXDIiKQ0KYGBgDQoNCioqUTI6KiogWW91IG5vdGljZSB5b3VyIG1pc3Rha2UgYW5kIGNvcnJlY3QgeW91ciBwYXR0ZXJuIHJlZ2V4IHRvIHRoZSBmb2xsb3dpbmcNCl9XaGF0IGlzIHlvdXIgcmVzdWx0P18NCmBgYHtyfQ0KcHJvYmxlbXMxIDwtIGMoIjUuMyIsICI1LDUiLCAiNiAxIiwgIjUgLjExIiwgIjUsIDEyIikNCnBhdHRlcm5fd2l0aF9ncm91cHMgPC0gIl4oWzQtN10pWyxcXC5cXHNdKFxcZCopJCINCnN0cl9yZXBsYWNlKHByb2JsZW1zMSwgcGF0dGVybl93aXRoX2dyb3VwcywgIlxcMSdcXDIiKQ0KYGBgDQoNCjxwIHN0eWxlPSJjb2xvcjpyZWQiPkkgdGhpbmsgd2hhdCBpdCBpbnRlbmRzIHRvIGRvIGlzIC4uLjwvcD4NCmBgYHtyfQ0KcHJvYmxlbXMxIDwtIGMoIjUuMyIsICI1LDUiLCAiNiAxIiwgIjUgLjExIiwgIjUsIDEyIikNCnBhdHRlcm5fd2l0aF9ncm91cHMgPC0gIl4oWzQtN10pXFxzKlssXFwuXFxzXVxccyooXFxkKikkIg0Kc3RyX3JlcGxhY2UocHJvYmxlbXMxLCBwYXR0ZXJuX3dpdGhfZ3JvdXBzLCAiXFwxJ1xcMiIpDQpgYGANCg0KIyMjIyMgQzYuIFRlc3RpbmcgYW5kIEltcHJvdmluZw0KDQpgYGB7cn0NCmNvbnZlcnRlZCA8LSBwcm9ibGVtcyAlPiUgDQogIHN0cl9yZXBsYWNlKCJmZWV0fGZvb3R8ZnQiLCAiJyIpICU+JSANCiAgc3RyX3JlcGxhY2UoImluY2hlc3xpbnwnJ3xcIiIsICIiKSAlPiUgDQogIHN0cl9yZXBsYWNlKCJeKFs0LTddKVxccypbLFxcLlxccytdXFxzKihcXGQqKSQiLCAiXFwxJ1xcMiIpDQpgYGANCg0KYGBge3J9DQpwYXR0ZXJuIDwtICJeWzQtN11cXHMqJ1xccypcXGR7MSwyfSQiDQppbmRleCA8LSBzdHJfZGV0ZWN0KGNvbnZlcnRlZCwgcGF0dGVybikNCm1lYW4oaW5kZXgpICAgICAgICAgICMgMC40MjEyMw0KYGBgDQoNCmBgYHtyfQ0KY29udmVydGVkWyFpbmRleF0gICAgDQpgYGANCg0KKipRMToqKiBJbiBvdXIgZXhhbXBsZSwgd2UgdXNlIHRoZSBmb2xsb3dpbmcgY29kZSB0byBkZXRlY3QgaGVpZ2h0IGVudHJpZXMgdGhhdCBkbyBub3QgbWF0Y2ggb3VyIHBhdHRlcm4gb2YgeOKAmXnigJ0uDQpgYGB7ciBldmFsPUZ9DQpwcm9ibGVtczEgPC0gYygiNS4zIiwgIjUsNSIsICI2IDEiLCAiNSAuMTEiLCAiNSwgMTIiKQ0KY29udmVydGVkMSA8LSBwcm9ibGVtczEgJT4lIA0KICBzdHJfcmVwbGFjZSgiZmVldHxmb290fGZ0IiwgIiciKSAlPiUgDQogIHN0cl9yZXBsYWNlKCJpbmNoZXN8aW58Jyd8XCIiLCAiIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiXihbNC03XSlcXHMqWyxcXC5cXHMrXVxccyooXFxkKikkIiwgIlxcMSdcXDIiKQ0KDQpwYXR0ZXJuIDwtICJeWzQtN11cXHMqJ1xccypcXGR7MSwyfSQiDQppbmRleCA8LSBzdHJfZGV0ZWN0KGNvbnZlcnRlZDEsIHBhdHRlcm4pDQpjb252ZXJ0ZWQxWyFpbmRleF0NCmBgYA0KDQpfV2hpY2ggYW5zd2VyIGJlc3QgZGVzY3JpYmVzIHRoZSBkaWZmZXJlbmNlc18gYmV0d2VlbiB0aGUgcmVnZXggc3RyaW5nIHdlIHVzZSBhcyBhbiBhcmd1bWVudCBpbiA8YnI+DQpgc3RyX3JlcGxhY2UoIl4oWzQtN10pXFxzKlssXFwuXFxzK11cXHMqKFxcZCopJCIsICJcXDEnXFwyIilgIDxicj4NCkFuZCB0aGUgcmVnZXggc3RyaW5nIGluIDxicj4NCmBwYXR0ZXJuIDwtICJeWzQtN11cXHMqJ1xccypcXGR7MSwyfSQiP2ANCg0KKyBUaGUgcmVnZXggdXNlZCBpbiBzdHJfcmVwbGFjZSBsb29rcyBmb3IgZWl0aGVyIGEgY29tbWEsIHBlcmlvZCBvciBzcGFjZSBiZXR3ZWVuIHRoZSBmZWV0IGFuZCBpbmNoZXMgZGlnaXRzLCB3aGlsZSB0aGUgcGF0dGVybiByZWdleCBqdXN0IGxvb2tzIGZvciBhbiBhcG9zdHJvcGhlOyB0aGUgcmVnZXggaW4gc3RyX3JlcGxhY2UgYWxsb3dzIGZvciBub25lIG9yIG1vcmUgZGlnaXRzIHRvIGJlIGVudGVyZWQgYXMgaW5jaGVzLCB3aGlsZSB0aGUgcGF0dGVybiByZWdleCBvbmx5IGFsbG93cyBmb3Igb25lIG9yIHR3byBkaWdpdHMuDQorDQoNCioqUTI6KiogWW91IG5vdGljZSBhIGZldyBlbnRyaWVzIHRoYXQgYXJlIG5vdCBiZWluZyBwcm9wZXJseSBjb252ZXJ0ZWQgdXNpbmcgeW91ciBzdHJfcmVwbGFjZSBhbmQgc3RyX2RldGVjdCBjb2RlDQpgYGB7cn0NCnllcyA8LSBjKCI1IGZlZXQgN2luY2hlcyIpDQpubyA8LSBjKCI1ZnQgOSBpbmNoZXMiLCAiNSBmdCA5IGluY2hlcyIpDQpzIDwtIGMoeWVzLCBubykNCg0KY29udmVydGVkIDwtIHMgJT4lIA0KICBzdHJfcmVwbGFjZSgiZmVldHxmb290fGZ0IiwgIiciKSAlPiUgDQogIHN0cl9yZXBsYWNlKCJpbmNoZXN8aW58Jyd8XCIiLCAiIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiXihbNC03XSlcXHMqWyxcXC5cXHMrXVxccyooXFxkKikkIiwgIlxcMSdcXDIiKQ0KY29udmVydGVkDQoNCnBhdHRlcm4gPC0gIl5bNC03XVxccyonXFxzKlxcZHsxLDJ9JCINCnN0cl9kZXRlY3QoY29udmVydGVkLCBwYXR0ZXJuKQ0KYGBgDQoNCkl0IHNlZW1zIGxpa2UgdGhlIHByb2JsZW0gbWF5IGJlIGR1ZSB0byBzcGFjZXMgYXJvdW5kIHRoZSB3b3JkcyBmZWV0fGZvb3R8ZnQgYW5kIGluY2hlc3xpbi4gX1doYXQgaXMgYW5vdGhlciB3YXkgeW91IGNvdWxkIGZpeCB0aGlzIHByb2JsZW0/Xw0KYGBge3J9DQpjb252ZXJ0ZWQgPC0gcyAlPiUgDQogIHN0cl9yZXBsYWNlKCJcXHMqZmVldHxmb290fGZ0XFxzKiIsICInIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiXFxzKmluY2hlc3xpbnwnJ3xcIlxccyoiLCAiIikgJT4lIA0KICBzdHJfcmVwbGFjZSgiXihbNC03XSlcXHMqWyxcXC5cXHMrXVxccyooXFxkKikkIiwgIlxcMSdcXDIiKQ0KDQpjb252ZXJ0ZWQNCnBhdHRlcm4gPC0gIl5bNC03XVxccyonXFxzKlxcZHsxLDJ9JCINCnN0cl9kZXRlY3QoY29udmVydGVkLCBwYXR0ZXJuKQ0KYGBgDQoNCjxicj4NCg0KLSAtIC0NCg0KIyMjIEQuIFN0cmluZyBQcm9jZXNzaW5nIFBhcnQgMw0KDQojIyMjIyBEMi4gU2VwYXJhdGUgd2l0aCBSZWdleA0KYGBge3J9DQpzID0gYygiNScxMCIsICI2JzEiKQ0KdGFiID0gZGF0YS5mcmFtZSh4ID0gcykNCnNlcGFyYXRlKHRhYiwgeCwgYygiZmVldCIsICJpbmNoZXMiKSwgc2VwPSInIikNCmV4dHJhY3QodGFiLCB4LCBjKCJmZWV0IiwgImluY2hlcyIpLCByZWdleD0iKFxcZCknKFxcZHsxLDJ9KSIpDQpgYGANCg0KYGBge3J9DQpzID0gYygiNScxMCIsICI2JzFcIiIsIjUnOGluY2hlcyIpDQp0YWIgPSBkYXRhLmZyYW1lKHggPSBzKQ0Kc2VwYXJhdGUodGFiLCB4LCBjKCJmZWV0IiwgImluY2hlcyIpLCBzZXA9IiciKQ0KZXh0cmFjdCh0YWIsIHgsIGMoImZlZXQiLCAiaW5jaGVzIiksIHJlZ2V4PSIoXFxkKScoXFxkezEsMn0pIikNCmBgYA0KDQojIyMjIyBEMS4gVXNpbmcgR3JvdXBzIGFuZCBRdWFudGlmaWVycw0KDQoqKiBRMToqKiBJZiB5b3UgdXNlIHRoZSBleHRyYWN0IGNvZGUgZnJvbSBvdXIgdmlkZW8sIHRoZSBkZWNpbWFsIHBvaW50IGlzIGRyb3BwZWQuIFdoYXQgbW9kaWZpY2F0aW9uIG9mIHRoZSBjb2RlIHdvdWxkIGFsbG93IHlvdSB0byBwdXQgdGhlIGRlY2ltYWxzIGluIGEgdGhpcmQgY29sdW1uIGNhbGxlZCDigJxkZWNpbWFs4oCdPw0KYGBge3J9DQpsaWJyYXJ5KHRpZHlyKQ0KcyA8LSBjKCI1JzEwIiwgIjYnMVwiIiwgIjUnOGluY2hlcyIsICI1JzcuNSIpDQp0YWIgPC0gZGF0YS5mcmFtZSh4ID0gcykNCnJ4ID0gYygiKFxcZCknKFxcZHsxLDJ9KShcXC4pPyIsIA0KICAgICAgICIoXFxkKScoXFxkezEsMn0pKFxcLlxcZCspIiwNCiAgICAgICAiKFxcZCknKFxcZHsxLDJ9KVxcLlxcZCs/IiwNCiAgICAgICAiKFxcZCknKFxcZHsxLDJ9KShcXC5cXGQrKT8iKQ0KZXh0cmFjdCh0YWIsIHgsIGludG89YygiZmVldCIsICJpbmNoZXMiLCAiZGVjaW1hbCIpLCByZWdleD1yeFs0XSkNCmBgYA0KDQojIyMjIyBENC4gU3RyaW5nIFNwbGl0dGluZw0KDQpgYGB7cn0NCmZpbGVuYW1lID0gIHN5c3RlbS5maWxlKCJleHRkYXRhL211cmRlcnMuY3N2IiwgcGFja2FnZT0iZHNsYWJzIikNCmxpbmVzID0gcmVhZExpbmVzKGZpbGVuYW1lKQ0KaGVhZChsaW5lcykNCmBgYA0KDQpgYGB7cn0NCnggPSBzdHJfc3BsaXQobGluZXMsICIsIiwgc2ltcGxpZnk9VCkNCmhlYWQoeCkNCmBgYA0KDQpgYGB7cn0NCmFzLmRhdGEuZnJhbWUoeFstMSxdKSAlPiUgDQogIHNldE5hbWVzKHhbMSxdKSAlPiUgDQogIG11dGF0ZV9hbGwocGFyc2VfZ3Vlc3MpICU+JSANCiAgaGVhZCgxMCkNCmBgYA0KDQoqKlExOioqIFlvdSBoYXZlIHRoZSBmb2xsb3dpbmcgdGFibGUNCmBgYHtyfQ0Kc2NoZWR1bGUgPSBkYXRhLmZyYW1lKA0KICBkYXkgPSBjKCJNb25kYXkiLCAiVHVlc2RheSIpLA0KICBzdGFmZiA9IGMoIk1hbmR5LCBDaHJpcyBhbmQgTGF1cmEiLCAiU3RldmUsIFJ1dGggYW5kIEZyYW5rIikpDQoNCnNjaGVkdWxlDQpgYGANCg0KX1doaWNoIHR3byBjb21tYW5kcyB3b3VsZCBwcm9wZXJseSBzcGxpdCB0aGUgdGV4dCBpbiB0aGUg4oCcU3RhZmbigJ0gY29sdW1uIGludG8gZWFjaCBpbmRpdmlkdWFsIG5hbWU/IENoZWNrIGFsbCB0aGF0IGFwcGx5Ll8NCmBgYHtyfQ0KbGFwcGx5KGMoIix8YW5kIiwgIiwgfCBhbmQgIiwgIixcXHN8XFxzYW5kXFxzIiwgIlxccz8oLHxhbmQpXFxzPyIpLA0KICAgICAgIGZ1bmN0aW9uKHIpIHN0cl9zcGxpdChzY2hlZHVsZSRzdGFmZiwgciwgc2ltcGxpZnk9VCkpDQpgYGANCg0KKipRMjoqKiBfV2hhdCBjb2RlIHdvdWxkIHN1Y2Nlc3NmdWxseSB0dXJuIHlvdXIg4oCcU2NoZWR1bGXigJ0gdGFibGUgaW50byB0aGUgZm9sbG93aW5nIHRpZHkgdGFibGVfDQpgYGB7cn0NCnNjaGVkdWxlICU+JSANCiAgbXV0YXRlKHN0YWZmID0gc3RyX3NwbGl0KHN0YWZmLCAiLCB8IGFuZCAiKSkgJT4lIA0KICB1bm5lc3QoKQ0KYGBgDQoNCiMjIyMjIEQuNiBSZWNvZGluZw0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmRhdGEoImdhcG1pbmRlciIpDQpnYXBtaW5kZXIgJT4lIGZpbHRlcihyZWdpb24gPT0gIkNhcmliYmVhbiIpICU+JSANCiAgZ2dwbG90KGFlcyh5ZWFyLCBsaWZlX2V4cGVjdGFuY3ksIGNvbG9yPWNvdW50cnkpKSArDQogIGdlb21fbGluZSgpDQpgYGANCg0KYGBge3J9DQpnYXBtaW5kZXIgJT4lIGZpbHRlcihyZWdpb24gPT0gIkNhcmliYmVhbiIpICU+JSANCiAgZmlsdGVyKHN0cl9sZW5ndGgoY291bnRyeSkgPj0gMTIpICU+JSANCiAgZGlzdGluY3QoY291bnRyeSkNCmBgYA0KDQpgYGB7cn0NCmdhcG1pbmRlciAlPiUgZmlsdGVyKHJlZ2lvbiA9PSAiQ2FyaWJiZWFuIikgJT4lIA0KICBtdXRhdGUoY291bnRyeSA9IHJlY29kZSgNCiAgICBjb3VudHJ5LCANCiAgICBgQW50aWd1YSBhbmQgQmFyYnVkYWAgPSAiQmFyYnVkYSIsDQogICAgYERvbWluaWNhbiBSZXB1YmxpY2AgPSAiRFIiLA0KICAgIGBTdC4gVmluY2VudCBhbmQgdGhlIEdyZW5hZGluZXNgID0gIlN0LiBWaW5jZW50IiwNCiAgICBgVHJpbmlkYWQgYW5kIFRvYmFnb2AgPSAiVHJpbmlkYWQiDQogICkpICU+JSANCiAgZ2dwbG90KGFlcyh5ZWFyLCBsaWZlX2V4cGVjdGFuY3ksIGNvbG9yPWNvdW50cnkpKSArDQogIGdlb21fbGluZSgpDQpgYGANCg0KDQoqKlExOioqIA0KVXNpbmcgdGhlIGdhcG1pbmRlciBkYXRhLCB5b3Ugd2FudCB0byByZWNvZGUgY291bnRyaWVzIGxvbmdlciB0aGFuIDEyIGxldHRlcnMgaW4gdGhlIHJlZ2lvbiBgTWlkZGxlIEFmcmljYWAgdG8gdGhlaXIgYWJicmV2aWF0aW9ucyBpbiBhIG5ldyBjb2x1bW4sIGBjb3VudHJ5X3Nob3J0YC4gX1doaWNoIGNvZGUgd291bGQgYWNjb21wbGlzaCB0aGlzP18NCmBgYHtyfQ0KbGlicmFyeShkc2xhYnMpDQpkYXRhKGdhcG1pbmRlcikNCg0KZ2FwbWluZGVyICU+JSBmaWx0ZXIocmVnaW9uID09ICJNaWRkbGUgQWZyaWNhIikgJT4lIA0KICBmaWx0ZXIobmNoYXIoYXMuY2hhcmFjdGVyKGNvdW50cnkpKSA+PSAxMikgJT4lIA0KICBzZWxlY3QocmVnaW9uLCBjb3VudHJ5KSAlPiUgZGlzdGluY3QoKSAlPiUgDQogIG11dGF0ZShjb3VudHJ5X3Nob3J0ID0gcmVjb2RlKGNvdW50cnksIA0KICAgICJDZW50cmFsIEFmcmljYW4gUmVwdWJsaWMiID0gIkNBUiIsIA0KICAgICJDb25nbywgRGVtLiBSZXAuIiA9ICJEUkMiLA0KICAgICJFcXVhdG9yaWFsIEd1aW5lYSIgPSAiRXEuIEd1aW5lYSINCiAgICApICkNCmBgYA0KDQo8YnI+DQoNCi0gLSAtDQoNCiMjIyBFLiBEYXRlLCBUaW1lcyBhbmQgVGV4dCBNaW5pbmcNCg0KIyMjIyMgRTEuIERhdGVzIGFuZCBUaW1lcw0KDQoqKlExOioqIF9XaGljaCBvZiB0aGUgZm9sbG93aW5nIGlzIHRoZSBzdGFuZGFyZCBJU08gODYwMSBmb3JtYXQgZm9yIGRhdGVzP18NCg0KKyBZWVlZLU1NLUREDQorDQoNCioqUTI6KiogX1doaWNoIG9mIHRoZSBmb2xsb3dpbmcgY29tbWFuZHMgY291bGQgY29udmVydCB0aGlzIHN0cmluZyBpbnRvIHRoZSBjb3JyZWN0IGRhdGUgZm9ybWF0P18NCmBgYHtyfQ0KbGlicmFyeShsdWJyaWRhdGUpDQpkYXRlcyA8LSBjKCIwOS0wMS0wMiIsICIwMS0xMi0wNyIsICIwMi0wMy0wNCIpDQp5bWQoZGF0ZXMpDQptZHkoZGF0ZXMpDQpkbXkoZGF0ZXMpDQpgYGANCg0KKyBJdCBpcyBpbXBvc3NpYmxlIHRvIGtub3cgd2hpY2ggZm9ybWF0IGlzIGNvcnJlY3Qgd2l0aG91dCBhZGRpdGlvbmFsIGluZm9ybWF0aW9uLg0KKw0KDQoNCi0gLSAtDQoNCjxicj48YnI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCi5jYXB0aW9uIHsNCiAgY29sb3I6ICM3Nzc7DQogIG1hcmdpbi10b3A6IDEwcHg7DQp9DQpwIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnByZSB7DQogIHdvcmQtYnJlYWs6IG5vcm1hbDsNCiAgd29yZC13cmFwOiBub3JtYWw7DQogIGxpbmUtaGVpZ2h0OiAxOw0KfQ0KcHJlIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnAsbGkgew0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KLnJ7DQogIGxpbmUtaGVpZ2h0OiAxLjI7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDg4MDA7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoM3sNCiAgY29sb3I6ICNiMzZiMDA7DQogIGJhY2tncm91bmQ6ICNmZmUwYjM7DQogIGxpbmUtaGVpZ2h0OiAyOw0KICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KaDV7DQogIGNvbG9yOiAjMDA2MDAwOw0KICBiYWNrZ3JvdW5kOiAjZmZmZmUwOw0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmVtew0KICBjb2xvcjogIzAwMDBjMDsNCiAgYmFja2dyb3VuZDogI2YwZjBmMDsNCiAgfQ0KPC9zdHlsZT4NCg==