EDA Exercise

This notebook uses US Real Estate sample dataset from ZenRow, by way of r/datasets. The dataset contains records of 10,000 houses on sale in U.S. cities from ttps://www.zillow.com/, scrapped by ZenRow.

# Load libaries 
library(tidyverse)
library(scales)
library(skimr)
library(janitor)
library(psych)
library(gghalves)
library(ggstatsplot)
library(dendextend)
library(factoextra)
library(wesanderson)
library(ggsci)
library(colorspace)
library(geofacet)
library(gt)

# set theme
theme_set(theme_minimal(base_size = 10))
theme_update(panel.grid.minor=element_blank(),
             plot.title.position="plot",
             axis.title=element_text(face="bold",size=9))
# import data 
real_estate = read_csv("us-cities-real-estate-sample.csv", na="NULL") %>% 
  clean_names() 
Missing column names filled in: 'X47' [47]
── Column specification ───────────────────────────────────────────────────────────────────────
cols(
  .default = col_character(),
  zpid = col_double(),
  id = col_double(),
  hasImage = col_logical(),
  unformattedPrice = col_double(),
  addressZipcode = col_double(),
  isUndisclosedAddress = col_logical(),
  beds = col_double(),
  baths = col_double(),
  area = col_double(),
  latitude = col_double(),
  longitude = col_double(),
  isZillowOwned = col_logical(),
  has3DModel = col_logical(),
  hasVideo = col_logical(),
  hasAdditionalAttributions = col_logical(),
  isFeaturedListing = col_logical(),
  list = col_logical(),
  relaxed = col_logical(),
  openHouseStartDate = col_datetime(format = ""),
  X47 = col_logical()
)
ℹ Use `spec()` for the full column specifications.
glimpse(real_estate)
Rows: 10,000
Columns: 47
$ zpid                           <dbl> 115423997, 2100098805, 14543206, 72977167, 17599151, 1…
$ id                             <dbl> 115423997, 2100098805, 14543206, 72977167, 17599151, 1…
$ provider_listing_id            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ img_src                        <chr> "https://photos.zillowstatic.com/fp/688dbfb9d9af6a37bb…
$ has_image                      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
$ detail_url                     <chr> "https://www.zillow.com/homedetails/1053-Lutheran-Chur…
$ status_type                    <chr> "FOR_SALE", "FOR_SALE", "FOR_SALE", "FOR_SALE", "FOR_S…
$ status_text                    <chr> "House for sale", "Active", "Townhouse for sale", "Hou…
$ country_currency               <chr> "$", "$", "$", "$", "$", "$", "$", "$", "$", "$", "$",…
$ price                          <chr> "$330,000", "$99,900", "$390,000", "$254,900", "$648,7…
$ unformatted_price              <dbl> 330000, 99900, 390000, 254900, 648700, 339900, 149000,…
$ address                        <chr> "1053 Lutheran Church Rd, Bardstown, KY 40004", "0 Old…
$ address_street                 <chr> "1053 Lutheran Church Rd", "0 Old Swanzey Rd", "1718 W…
$ address_city                   <chr> "Bardstown", "Chesterfield", "Atlanta", "Wilmington", …
$ address_state                  <chr> "KY", "NH", "GA", "DE", "CA", "VA", "MS", "OK", "NV", …
$ address_zipcode                <dbl> 40004, 3443, 30329, 19809, 91730, 24431, 39531, 73090,…
$ is_undisclosed_address         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ beds                           <dbl> 3, NA, 3, 3, 4, 3, NA, 2, 3, NA, NA, 2, 3, 3, 2, 3, 2,…
$ baths                          <dbl> 3, NA, 3, 1, 2, 3, NA, 1, 3, NA, NA, 2, 2, 3, 2, 2, 1,…
$ area                           <dbl> 2054, NA, 2154, 1025, 1322, 2026, 3600, 925, 1828, NA,…
$ latitude                       <dbl> 37.85539, 42.88115, 33.83270, 39.75484, 34.11825, 38.1…
$ longitude                      <dbl> -85.53178, -72.39908, -84.32765, -75.50329, -117.58391…
$ is_zillow_owned                <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,…
$ variable_data_type             <chr> "DAYS_ON", "DAYS_ON", "PRICE_REDUCTION", "PRICE_REDUCT…
$ variable_data_text             <chr> "1 day on Zillow", "36 days on Zillow", "$5,000 (Jun 1…
$ variable_data_is_fresh         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ badge_info                     <chr> "ForSale", "ForSale", NA, "ForSale", NA, NA, NA, "ForS…
$ pgapt                          <chr> "For Sale (Broker)", "For Sale (Broker)", "ForSale", "…
$ sgapt                          <chr> "291700", NA, "For Sale (Broker)", "255100", "For Sale…
$ zestimate                      <chr> "false", "false", "397900", "false", "681500", "342400…
$ should_show_zestimate_as_price <chr> "false", "false", "false", "false", "false", "false", …
$ has3d_model                    <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,…
$ has_video                      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ is_home_rec                    <chr> NA, "Listing provided by NEREN", "false", NA, "false",…
$ info2string                    <chr> "https://photos.zillowstatic.com/fp/44097c1919ccfd9c21…
$ info3string                    <chr> "Demaree & Hubbard", NA, "https://photos.zillowstatic.…
$ broker_name                    <chr> "true", "false", "Opendoor Brokerage, LLC", "true", NA…
$ has_additional_attributions    <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ is_featured_listing            <lgl> TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TR…
$ list                           <lgl> FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FA…
$ relaxed                        <lgl> NA, NA, FALSE, NA, FALSE, FALSE, FALSE, NA, NA, FALSE,…
$ has_open_house                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "t…
$ open_house_start_date          <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2…
$ open_house_end_date            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "2…
$ open_house_description         <chr> "", "", NA, "", NA, NA, NA, "", "", NA, NA, "", NA, "O…
$ info6string                    <chr> "", "", NA, "", NA, NA, "Richard Reynolds", "", "", "P…
$ x47                            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
# skim 
skim(real_estate)
── Data Summary ────────────────────────
                           Values     
Name                       real_estate
Number of rows             10000      
Number of columns          47         
_______________________               
Column type frequency:                
  character                27         
  logical                  10         
  numeric                  9          
  POSIXct                  1          
________________________              
Group variables            None       

── Variable type: character ───────────────────────────────────────────────────────────────────
   skim_variable                  n_missing complete_rate   min   max empty n_unique whitespace
 1 provider_listing_id                 8663        0.134      0    14    25     1293          0
 2 img_src                                0        1         75   231     0     9940          0
 3 detail_url                             0        1         57   121     0    10000          0
 4 status_type                            0        1          8     8     0        1          0
 5 status_text                            0        1          3    26     0       24          0
 6 country_currency                       0        1          1     1     0        1          0
 7 price                                  0        1          0    12     8     2019          0
 8 address                                0        1         17    72     0    10000          0
 9 address_street                         0        1          0    64     1     9980          0
10 address_city                           0        1          0    26     1     4096          0
11 address_state                          0        1          2     2     0       51          0
12 variable_data_type                  1001        0.900      7    17     0        8          0
13 variable_data_text                  1001        0.900      7    26     0     1518          0
14 variable_data_is_fresh              9820        0.018      5    11     0        8          0
15 badge_info                          6458        0.354      7    10     0        3          0
16 pgapt                                  5        1.00       7    17     0        9          0
17 sgapt                               1688        0.831      5    17     0     1503          0
18 zestimate                           3229        0.677      5    16     0     2412          0
19 should_show_zestimate_as_price         3        1.00       4     6     0        4          0
20 is_home_rec                         2731        0.727      5    43     0       36          0
21 info2string                         6887        0.311      5    88     0      140          0
22 info3string                         4004        0.600      6    88     0     1831          0
23 broker_name                         1776        0.822      3    73     0     3194          0
24 has_open_house                      9391        0.0609     4    19     0       31          0
25 open_house_end_date                 9396        0.0604    19    31     0       72          0
26 open_house_description              6057        0.394      0    31  3542       63          0
27 info6string                         5646        0.435      0    41  3547      762          0

── Variable type: logical ─────────────────────────────────────────────────────────────────────
   skim_variable               n_missing complete_rate     mean count                 
 1 has_image                         100         0.99    1      "TRU: 9900"           
 2 is_undisclosed_address              0         1       0.0015 "FAL: 9985, TRU: 15"  
 3 is_zillow_owned                     0         1       0.0067 "FAL: 9933, TRU: 67"  
 4 has3d_model                         0         1       0.0449 "FAL: 9551, TRU: 449" 
 5 has_video                           0         1       0.009  "FAL: 9910, TRU: 90"  
 6 has_additional_attributions         5         1.00    0.491  "FAL: 5089, TRU: 4906"
 7 is_featured_listing                 0         1       0.355  "FAL: 6453, TRU: 3547"
 8 list                                0         1       0.645  "TRU: 6453, FAL: 3547"
 9 relaxed                          3339         0.666   0.0312 "FAL: 6453, TRU: 208" 
10 x47                             10000         0     NaN      ": "                  

── Variable type: numeric ─────────────────────────────────────────────────────────────────────
  skim_variable     n_missing complete_rate         mean           sd     p0        p25
1 zpid                      0         1     865572492.   968581736.     82   63347023. 
2 id                        0         1     865572492.   968581736.     82   63347023. 
3 unformatted_price         8         0.999    421134.     4917697.      1     110000  
4 address_zipcode           2         1.00      49847.       30054.   1001      24739. 
5 beds                   2696         0.730         3.11         3.26    0          2  
6 baths                  2933         0.707         2.42         1.26    1          2  
7 area                   3054         0.695      2085.        7719.      1       1200  
8 latitude                393         0.961        39.0          5.67   19.1       35.2
9 longitude               393         0.961       -92.1         17.5  -160.      -104. 
          p50          p75         p100 hist 
1 174180452   2071098630.  2146980069   ▇▁▁▁▅
2 174180452   2071098630.  2146980069   ▇▁▁▁▅
3    225000       388250    474900000   ▇▁▁▁▁
4     48959        77488.       99921   ▇▇▆▅▇
5         3            4          234   ▇▁▁▁▁
6         2            3           24   ▇▁▁▁▁
7      1612         2208.      435600   ▇▁▁▁▁
8        39.6         42.3         65.1 ▁▅▇▁▁
9       -87.8        -78.6        -67.0 ▁▁▃▆▇

── Variable type: POSIXct ─────────────────────────────────────────────────────────────────────
  skim_variable         n_missing complete_rate min                 max                
1 open_house_start_date      9396        0.0604 2021-06-26 16:00:00 2021-07-11 15:00:00
  median              n_unique
1 2021-06-27 13:00:00       43
# check for duplicates: rows 
n_distinct(real_estate$id)
[1] 10000
real_estate %>% get_dupes() %>% flatten() # no duplicates found
No variable names specified - using all columns.

No duplicate combinations found of: zpid, id, provider_listing_id, img_src, has_image, detail_url, status_type, status_text, country_currency, ... and 38 other variables
list()

State

# listing count by state
tab_state = real_estate %>% count(address_state)

# function reference: https://medium.com/@NickDoesData/visualizing-geographic-data-in-r-fb2e0f5b59c5
create_gradient_state_tile_map <- function(state, value, title, subtitle, caption, legend_title, state_grid='us_state_grid2') {
  
  df <- as.tibble(data.frame(state, value))
  
  fig <- df %>% 
    mutate(x = 1) %>% 
    mutate(label_y = .5) %>%  
    mutate(label_x = 1) %>% 
    ggplot()+
    geom_bar(mapping=aes(x=x, fill=value))  +
    facet_geo(~ state, grid=state_grid) +
    ggtitle(title) +
    geom_text(aes(x=label_x, y=label_y, label=state), color='#ffffff', size=3.5) 
  
  return(fig)
}

create_gradient_state_tile_map(tab_state$address_state, tab_state$n, title='Sale listing count by state\n', legend_title = "", caption="") + 
  scale_fill_continuous_sequential(palette="Heat", limits=c(1,323), breaks=c(1,100,200,323)) +
  theme_void(base_size=10) + 
  theme(strip.text.x = element_blank(),
        plot.margin = unit(c(1,1.5,1,1.5), "cm"),
        plot.title=element_text(size=11),
        legend.title=element_text(size=9)) + 
  guides(fill = guide_colorbar(title="Count",
                              title.position = "top", 
                              barwidth = unit(.5, "lines"), 
                              barheight = unit(10, "lines"))) 

# median price by state
re_st = real_estate %>% drop_na(unformatted_price) %>%
  group_by(address_state) %>%
  filter(address_state!= "DC") %>%
  summarise(median_price=median(unformatted_price)) 

create_gradient_state_tile_map(re_st$address_state, re_st$median_price, title='Median sale price by state\n', legend_title = "", caption="") + 
  scale_fill_continuous_sequential(palette="Batlow", 
                                   labels = unit_format(unit = "K", scale = 1e-3)) +
  theme_void(base_size=10) + 
  theme(strip.text.x = element_blank(),
        plot.margin = unit(c(1,1.5,1,1.5), "cm"),
        plot.title=element_text(size=11),
        legend.title=element_text(size=9)) + 
  guides(fill = guide_colorbar(title="Median price",
                              title.position = "top", 
                              barwidth = unit(.5, "lines"), 
                              barheight = unit(10, "lines"))) 

Address city

# median price by address_city (cities with more than 40 listings)
real_estate %>% group_by(address_city) %>% 
  summarise(listing_count=n(), med = median(unformatted_price)) %>% 
  arrange(desc(listing_count)) %>%
  filter(listing_count>=40) %>%
  arrange(desc(med)) %>%
  mutate(lab = paste0(address_city," ","(","n=",listing_count,")")) %>%
  mutate(lab= fct_inorder(lab)) %>%
  ggplot(aes(y= fct_rev(lab), x= med, fill=I(ifelse(med==max(med), "#f6bd60","#adb5bd")))) + 
  geom_col(width=0.75) + 
  geom_text(aes(label=scales::dollar(med)),size=3, color="black", hjust=1.2) + 
  scale_x_continuous(labels = unit_format(unit = "K", scale = 1e-3), expand=c(0,0), 
                     limits=c(0,700000)) + 
  labs(x= "Median sale price (in USD)", y= "City (listing count)",
       subtitle="Median sale price by city (cities with >=40 sale listings)")

Status text

# status text and price
real_estate %>% count(status_text) %>% arrange(desc(n))

real_estate %>% drop_na(unformatted_price) %>%
  mutate(status_grp = fct_lump(factor(status_text), 6,other_level = "Other status text")) %>%
  mutate(status_grp = str_remove_all(status_grp,"for sale")) %>% 
  mutate(status_grp = if_else(status_grp=="Home ","Other status text",status_grp)) %>%
  group_by(status_grp) %>%
  summarise(median=median(unformatted_price),
            min=min(unformatted_price), max=max(unformatted_price), range=max-min,
            mean = mean(unformatted_price)) %>%
  rename(status_text= status_grp) %>%
  arrange(desc(median)) %>%
  gt() %>%
  fmt_number(
    columns = c("median","min","max","range","mean"),
    decimals=0
  ) %>%
  data_color(
    columns =c("median","min","max","range","mean"),
    colors = scales::col_numeric(
      palette = c("#ffffff", "#f2fbd2", "#c9ecb4", "#93d3ab", "#35b0ab"),
      domain = NULL
    )
  ) %>%
   tab_style(
    style = list(
      cell_borders(
        sides = "bottom",
        color = "black",
        weight = px(3)
      )
    ),
    locations = list(
      cells_column_labels(
        columns = gt::everything()
      )
    )
  ) %>%
  tab_options(table.font.size=14) %>%
  tab_header(title=md("**Sale price by status text**"))
Sale price by status text
status_text median min max range mean
Townhouse 315,000 700 25,995,000 25,994,300 551,101
Multi-family home 299,999 3,750 9,950,000 9,946,250 481,909
House 290,000 5,500 474,900,000 474,894,500 639,913
Condo 265,000 29,495 65,750,000 65,720,505 523,100
Other status text 230,000 1 20,150,000 20,149,999 351,428
Lot / Land 72,000 350 9,200,000 9,199,650 134,977

Price, beds, baths and area

# boxplot (price, beds, baths, area)
real_estate %>% select (zpid, unformatted_price, beds, baths, area) %>% 
  pivot_longer(!zpid) %>%
  ggplot(aes(x=value, color=name)) + 
  geom_boxplot(outlier.alpha = 0.8, outlier.shape = 21, show.legend = F) + 
  facet_wrap(~name, scales = "free", ncol=1, strip.position = "left") + 
  theme_light(base_size=10) +
  theme(strip.placement = "outside",
        axis.text.y = element_blank(),
        axis.ticks.y=element_blank()
        ) + 
  scale_x_continuous(labels=scales::comma) + 
  scale_y_continuous(breaks=c(0)) +
  scale_color_npg()

Area

# area and price
real_estate %>% 
  select(unformatted_price, area) %>%
  drop_na() %>%
  mutate(price_zs = (unformatted_price- mean(unformatted_price))/ sd(unformatted_price),
         area_zs=(area- mean(area))/ sd(area)) %>%
  filter(between(price_zs,-3,3)) %>%
  filter(between(area_zs,-3,3)) %>%
  ggplot(aes(x=area, y=unformatted_price)) + 
  geom_point(shape=21, alpha=0.5) + 
  geom_smooth(method='lm', se=FALSE, color="#f6bd60") + 
  stat_cor(method="pearson",label.x=15000, size=3.5) + 
  scale_y_continuous(labels=scales::label_number_si()) + 
  scale_x_continuous(labels = unit_format(unit = "K", scale = 1e-3, accuracy = 1)) +
  labs(x="Area", y="Sale price (in USD)", subtitle="Area and sale price")

Baths

# drop price outliers
re_price = real_estate %>% 
  drop_na(unformatted_price) %>%
  mutate(price_zs = (unformatted_price- mean(unformatted_price))/ sd(unformatted_price)) %>%
  filter(between(price_zs,-3,3)) 
summary(re_price$unformatted_price)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
       1   109900   225000   341653   386400 15000000 
# listing count by number of baths 
re_price %>% mutate(baths2 = as.integer(floor(baths))) %>% 
  count(baths2) %>%
  ggplot(aes(y=fct_rev(factor(baths2)), x=n)) + 
  geom_segment(aes(x=0, xend=n, y=fct_rev(factor(baths2)),yend=fct_rev(factor(baths2)))) +
  geom_point() + 
  geom_text(aes(label=n),size=3, hjust=-0.7) + 
  scale_x_continuous(limits=c(0,3500)) +
  labs(x="Sale listing count", y="Number of baths", subtitle="Sale listing count by number of baths")


# baths and price  
re_price %>% mutate(baths2 = as.integer(floor(baths))) %>%
  mutate(baths2_grp = fct_lump(factor(baths2), 6,other_level = "7 - 24")) %>% 
  ggplot(aes(y=fct_rev(baths2_grp), x=unformatted_price, color=fct_rev(baths2_grp))) + 
  geom_boxplot(outlier.shape = 21, outlier.alpha = 0.5, show.legend = F) + 
  scale_x_continuous(labels=scales::label_number_si()) + 
  labs(x="Sale price (in USD)", y="Number of baths", subtitle="Number of baths and sale price") + 
  scale_color_aaas(na.value="black")

Beds

# listing count by number of beds
re_price %>% mutate(beds_grp = fct_lump(factor(beds), 7,other_level = ">7")) %>%
  count(beds_grp) %>% 
  ggplot(aes(y=fct_rev(beds_grp), x=n, fill=n)) + 
  geom_col(width=0.75, show.legend=F) + 
  geom_text(aes(label=n),size=3, hjust=1.2, color="white")  +
  scale_fill_continuous_sequential(palette="Red-Blue") +
  labs(x="Sale listing count", y="Number of beds", subtitle="Sale listing count by number of beds")


# beds and price
re_price %>% mutate(beds_grp = fct_lump(factor(beds), 7,other_level = ">7")) %>%
  ggplot(aes(y=fct_rev(beds_grp), x=unformatted_price, color=fct_rev(beds_grp))) + 
  geom_boxplot(outlier.shape = 21, outlier.alpha = 0.5, show.legend=F) + 
  scale_x_continuous(labels=scales::label_number_si()) + 
  labs(x="Sale price (in USD)", y="Number of beds", subtitle="Number of beds and sale price") + 
  scale_color_aaas(na.value="black")

Correlation

# correlation
re1 = real_estate %>% 
  select(unformatted_price, baths, beds, area) %>% 
  drop_na()
dim(re1)
[1] 6571    4
set.seed(123)
ggcorrmat(
  data=re1,
  cor.vars=c(unformatted_price:area),
  title="Correlation",
)

# drop outliers 
re2 = re1 %>% mutate(price_zs = (unformatted_price- mean(unformatted_price))/ sd(unformatted_price),
         area_zs=(area- mean(area))/ sd(area),
         beds_zs=(beds- mean(beds))/ sd(beds),
         baths_zs=(baths- mean(baths))/ sd(baths)) %>%
  filter(between(price_zs,-3,3)) %>%
  filter(between(area_zs,-3,3)) %>%
  filter(between(beds_zs,-3,3)) %>%
  filter(between(baths_zs,-3,3)) %>%
  select(unformatted_price, baths, beds, area)
dim(re2)
[1] 6430    4
# scale 
re2_scaled = scale(re2)
# correlation after dropping outliers
set.seed(123)
ggcorrmat(
  data=re2,
  cor.vars=c(unformatted_price:area),
  title="Correlation",
  subtitle="After dropping outliers"
)

Clustering

  • price, baths, beds, area
# check optimal clusters: elbow method 
set.seed(123)
fviz_nbclust(re2_scaled,kmeans,method="wss")

# k means clustering
# 4 clusters 
set.seed(123)
km4= kmeans(re2_scaled,centers=4,nstart=50)
km4
K-means clustering with 4 clusters of sizes 1019, 18, 3310, 2083

Cluster means:
  unformatted_price       baths       beds        area
1        0.45324379  1.37863176  1.2998296  1.58383154
2       15.03809329  2.92301511  1.0855574  3.02158826
3       -0.09365508  0.09967272  0.1995130 -0.01750613
4       -0.20285299 -0.85806853 -0.9622921 -0.77310016

Clustering vector:
   [1] 3 3 4 3 3 4 3 4 3 3 4 3 4 4 3 4 3 4 1 3 3 4 1 3 1 3 3 3 4 3 3 4 4 3 3 4 3 3 4 3 4 3 4 4
  [45] 4 3 1 4 4 3 1 4 4 3 1 4 1 4 3 4 4 1 4 3 3 4 4 3 3 3 1 4 1 4 3 1 3 3 3 4 4 1 4 3 4 4 4 4
  [89] 1 1 4 4 4 4 4 4 3 3 4 3 3 4 3 1 3 3 4 3 4 4 3 4 3 4 4 4 3 3 3 3 1 3 4 4 3 3 4 1 3 1 3 3
 [133] 3 3 3 3 4 1 3 4 3 3 3 4 1 3 3 3 3 3 4 3 4 3 4 1 4 3 4 4 3 1 4 3 3 3 3 3 1 3 4 4 1 4 3 3
 [177] 4 4 3 1 4 3 4 3 3 3 4 3 4 3 1 3 1 3 4 4 3 3 3 3 4 4 3 3 3 3 4 3 3 3 4 3 4 3 4 1 4 3 4 3
 [221] 4 3 4 3 1 4 4 1 3 3 3 1 3 3 3 4 3 4 3 1 3 4 3 3 3 3 4 3 3 3 4 3 3 3 3 1 1 3 3 1 4 1 4 4
 [265] 3 4 3 1 4 3 3 4 3 1 3 1 4 4 1 3 3 3 4 4 4 3 3 4 1 1 3 1 4 4 1 4 4 4 4 3 3 1 3 3 4 3 3 3
 [309] 3 1 3 3 4 4 1 3 1 4 3 4 1 3 4 3 4 4 3 4 4 1 3 4 3 3 3 4 4 3 3 3 3 3 3 1 3 3 3 4 3 3 3 1
 [353] 1 4 4 4 3 4 4 4 3 1 4 4 4 4 3 3 4 1 3 4 4 3 3 1 4 3 3 3 1 3 4 3 3 3 3 3 4 3 1 3 3 3 4 3
 [397] 3 3 3 3 4 3 3 4 3 3 3 3 3 3 4 3 4 3 3 3 1 3 1 4 3 3 3 4 4 3 1 4 4 3 3 4 4 4 3 4 3 3 3 3
 [441] 3 3 1 4 3 4 4 3 3 1 1 4 3 4 3 4 3 4 3 4 3 4 4 3 3 4 4 3 4 3 1 4 3 3 1 3 3 1 4 3 4 3 4 4
 [485] 3 1 1 4 4 3 3 4 3 4 3 1 3 3 3 4 1 4 4 1 4 4 3 4 1 3 4 1 3 1 3 3 4 4 3 3 1 3 4 1 3 4 4 4
 [529] 3 3 3 1 4 3 3 3 3 3 3 4 3 4 4 4 3 4 1 1 4 4 3 1 3 4 3 1 4 4 4 1 3 2 3 1 4 4 4 4 1 4 3 4
 [573] 1 3 3 3 4 3 3 4 3 3 3 1 1 3 4 1 3 3 3 3 4 4 3 4 4 4 4 1 4 3 3 3 3 3 1 3 4 3 4 3 4 4 1 4
 [617] 1 3 4 1 3 3 1 4 3 1 3 4 4 4 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 4 3 3 3 3 4 1 3 3 4 4 1
 [661] 3 3 3 4 3 1 3 3 4 3 4 4 4 4 3 4 3 3 1 1 3 3 3 4 3 3 3 4 4 3 3 3 1 3 3 3 4 3 1 4 3 3 3 1
 [705] 3 1 3 3 4 3 4 1 3 3 3 4 3 4 1 3 3 1 4 1 4 3 3 4 3 3 3 4 3 1 3 4 3 3 3 4 4 1 4 3 3 3 1 3
 [749] 4 3 3 1 1 3 3 1 3 3 1 3 4 4 4 3 3 3 3 4 3 4 3 3 4 3 1 1 3 3 1 4 1 4 1 4 4 3 3 4 3 3 3 4
 [793] 1 3 4 3 1 1 1 3 3 1 3 4 1 3 3 4 4 4 4 3 3 3 1 3 4 4 4 4 3 3 3 3 3 1 4 3 4 3 4 4 3 3 1 3
 [837] 3 1 4 1 4 1 3 4 3 3 4 4 4 3 3 4 3 3 4 3 3 3 1 3 3 3 3 3 3 3 3 4 4 4 4 3 3 3 4 1 4 4 1 3
 [881] 4 3 1 3 3 4 3 4 1 3 4 3 3 4 3 3 3 3 4 1 4 1 3 1 1 3 4 4 4 1 3 3 1 3 4 3 3 3 3 1 4 4 1 3
 [925] 4 4 3 3 3 1 1 1 4 1 3 3 3 1 3 4 1 4 4 4 4 4 3 4 3 3 3 3 3 4 4 1 3 4 4 3 3 4 4 3 3 4 4 3
 [969] 4 3 4 4 3 3 1 4 1 3 1 3 1 3 3 4 3 3 4 4 4 3 3 1 3 4 3 3 3 4 3 4
 [ reached getOption("max.print") -- omitted 5430 entries ]

Within cluster sum of squares by cluster:
[1] 4164.1092  488.3428 3584.7652 1657.0068
 (between_SS / total_SS =  61.5 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"   
[7] "size"         "iter"         "ifault"      
fviz_cluster(km4, data=re2_scaled, labelsize=0) #cluster plot

with(re2,pairs(re2_scaled,col=(1:4)[km4$cluster])) #pair plot

  • cluster size (largest to smallest): C3 > C4 > C1 > C2
  • C1: n=1019, highest mean beds, 2nd highest mean price/area/baths
  • C2: n=18, highest mean price/area/baths, 2nd highest mean beds
  • C3: n=3310, 2nd lowest mean price/baths/beds/area
  • C4: n=2083, lowest mean price/baths/beds/area
#summary by k4 cluster ID
re2_cid = re2 %>% mutate(cluster_id = as.factor(km4$cluster))
by(re2_cid, re2_cid$cluster_id, summary)
re2_cid$cluster_id: 1
 unformatted_price     baths            beds            area       cluster_id
 Min.   :   9000   Min.   :1.000   Min.   :0.000   Min.   : 1060   1:1019    
 1st Qu.: 350000   1st Qu.:3.000   1st Qu.:4.000   1st Qu.: 2538   2:   0    
 Median : 499900   Median :4.000   Median :4.000   Median : 2939   3:   0    
 Mean   : 689508   Mean   :3.584   Mean   :4.406   Mean   : 3155   4:   0    
 3rd Qu.: 729000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.: 3476             
 Max.   :5000000   Max.   :6.000   Max.   :7.000   Max.   :11326             
----------------------------------------------------------------------- 
re2_cid$cluster_id: 2
 unformatted_price      baths           beds            area      cluster_id
 Min.   : 6500000   Min.   :3.00   Min.   :3.000   Min.   :1782   1: 0      
 1st Qu.: 7845000   1st Qu.:5.00   1st Qu.:4.000   1st Qu.:3540   2:18      
 Median : 9444000   Median :5.00   Median :4.000   Median :4237   3: 0      
 Mean   :10268778   Mean   :5.00   Mean   :4.167   Mean   :4419   4: 0      
 3rd Qu.:11998750   3rd Qu.:5.75   3rd Qu.:4.750   3rd Qu.:5080             
 Max.   :17750000   Max.   :6.00   Max.   :6.000   Max.   :8500             
----------------------------------------------------------------------- 
re2_cid$cluster_id: 3
 unformatted_price     baths            beds            area      cluster_id
 Min.   :   3750   Min.   :1.000   Min.   :1.000   Min.   :   1   1:   0    
 1st Qu.: 184924   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:1438   2:   0    
 Median : 278000   Median :2.000   Median :3.000   Median :1702   3:3310    
 Mean   : 330307   Mean   :2.412   Mean   :3.176   Mean   :1746   4:   0    
 3rd Qu.: 399999   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2020             
 Max.   :3899990   Max.   :4.000   Max.   :6.000   Max.   :3463             
----------------------------------------------------------------------- 
re2_cid$cluster_id: 4
 unformatted_price     baths            beds            area      cluster_id
 Min.   :    700   Min.   :1.000   Min.   :0.000   Min.   : 192   1:   0    
 1st Qu.: 135000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.: 864   2:   0    
 Median : 215000   Median :2.000   Median :2.000   Median :1067   3:   0    
 Mean   : 258586   Mean   :1.534   Mean   :1.877   Mean   :1082   4:2083    
 3rd Qu.: 325000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:1290             
 Max.   :2500000   Max.   :3.000   Max.   :3.000   Max.   :3120             

Further data cleaning

# clean df
clean_df = real_estate[!duplicated(as.list(real_estate))] %>% # drop id col
  drop_na(unformatted_price) %>% # drop listings without price
  mutate_at(vars(baths, beds), ~replace_na(., 0)) #replace NA with 0 
dim(clean_df)
[1] 9992   46
# count of missing in area col 
sum(is.na(clean_df$area))
[1] 3048
# df without na area col 
cdf2a = clean_df %>% drop_na(area)
dim(cdf2a)
[1] 6944   46
# df without na area col and outliers
cdf2b = cdf2a %>% #select(unformatted_price, area, beds, baths) %>%
  # get zscore
  mutate(price_zs = (unformatted_price- mean(unformatted_price))/ sd(unformatted_price),
         area_zs=(area- mean(area))/ sd(area),
         beds_zs=(beds- mean(beds))/ sd(beds),
         baths_zs=(baths- mean(baths))/ sd(baths)) %>%
  # drop outliers
  filter(between(price_zs,-3,3)) %>%
  filter(between(area_zs,-3,3)) %>%
  filter(between(beds_zs,-3,3)) %>%
  filter(between(baths_zs,-3,3)) 
dim(cdf2b)
[1] 6849   50
nrow(cdf2a) - nrow(cdf2b)
[1] 95
# df with missing area imputation and without outliers 

cdf3 = clean_df %>%
  # impute missing vals using median of bed and bath
  group_by(beds, baths) %>%
  mutate(area2=ifelse(is.na(area),median(area,na.rm=TRUE),area)) %>% 
  drop_na(area2) %>% 
  ungroup() %>%
  # get zscore
  mutate(price_zs = (unformatted_price- mean(unformatted_price))/ sd(unformatted_price),
         area_zs=(area2- mean(area2))/ sd(area2),
         beds_zs=(beds- mean(beds))/ sd(beds),
         baths_zs=(baths- mean(baths))/ sd(baths)) %>%
  # drop outliers
  filter(between(price_zs,-3,3)) %>%
  filter(between(area_zs,-3,3)) %>%
  filter(between(beds_zs,-3,3)) %>%
  filter(between(baths_zs,-3,3)) 
dim(cdf3)
[1] 9880   51
LS0tCnRpdGxlOiAiVVMgUmVhbCBFc3RhdGUiCmRhdGU6ICIyMDIxLzA3LzA1IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBFREEgRXhlcmNpc2UKClRoaXMgbm90ZWJvb2sgdXNlcyBbVVMgUmVhbCBFc3RhdGVdKGh0dHBzOi8vd3d3LnplbnJvd3MuY29tL2RhdGFzZXRzL3VzLXJlYWwtZXN0YXRlP3V0bV9zb3VyY2U9cmVkZGl0JnV0bV9tZWRpdW09c29jaWFsJnV0bV9jYW1wYWlnbj1kYXRhc2V0LXVzLXJlYWwtZXN0YXRlKSBzYW1wbGUgZGF0YXNldCBmcm9tIFplblJvdywgYnkgd2F5IG9mIFtyL2RhdGFzZXRzXShodHRwczovL3d3dy5yZWRkaXQuY29tL3IvZGF0YXNldHMvY29tbWVudHMvb2JqOHhqL29jX3VzX3JlYWxfZXN0YXRlX2ZyZWVfc2FtcGxlX2RhdGFzZXRfMTBrX3JlY29yZHMvKS4gVGhlIGRhdGFzZXQgY29udGFpbnMgcmVjb3JkcyBvZiAxMCwwMDAgaG91c2VzIG9uIHNhbGUgaW4gVS5TLiBjaXRpZXMgZnJvbSB0dHBzOi8vd3d3LnppbGxvdy5jb20vLCBzY3JhcHBlZCBieSBbWmVuUm93XShodHRwczovL3d3dy56ZW5yb3dzLmNvbS8pLiAKCmBgYHtyfQojIExvYWQgbGliYXJpZXMgCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHNjYWxlcykKbGlicmFyeShza2ltcikKbGlicmFyeShqYW5pdG9yKQpsaWJyYXJ5KHBzeWNoKQpsaWJyYXJ5KGdnaGFsdmVzKQpsaWJyYXJ5KGdnc3RhdHNwbG90KQpsaWJyYXJ5KGRlbmRleHRlbmQpCmxpYnJhcnkoZmFjdG9leHRyYSkKbGlicmFyeSh3ZXNhbmRlcnNvbikKbGlicmFyeShnZ3NjaSkKbGlicmFyeShjb2xvcnNwYWNlKQpsaWJyYXJ5KGdlb2ZhY2V0KQpsaWJyYXJ5KGd0KQoKIyBzZXQgdGhlbWUKdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoYmFzZV9zaXplID0gMTApKQp0aGVtZV91cGRhdGUocGFuZWwuZ3JpZC5taW5vcj1lbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICBwbG90LnRpdGxlLnBvc2l0aW9uPSJwbG90IiwKICAgICAgICAgICAgIGF4aXMudGl0bGU9ZWxlbWVudF90ZXh0KGZhY2U9ImJvbGQiLHNpemU9OSkpCmBgYAoKCmBgYHtyfQojIGltcG9ydCBkYXRhIApyZWFsX2VzdGF0ZSA9IHJlYWRfY3N2KCJ1cy1jaXRpZXMtcmVhbC1lc3RhdGUtc2FtcGxlLmNzdiIsIG5hPSJOVUxMIikgJT4lIAogIGNsZWFuX25hbWVzKCkgCmdsaW1wc2UocmVhbF9lc3RhdGUpCmBgYAoKYGBge3J9CiMgc2tpbSAKc2tpbShyZWFsX2VzdGF0ZSkKYGBgCgpgYGB7cn0KIyBjaGVjayBmb3IgZHVwbGljYXRlczogcm93cyAKbl9kaXN0aW5jdChyZWFsX2VzdGF0ZSRpZCkKcmVhbF9lc3RhdGUgJT4lIGdldF9kdXBlcygpICU+JSBmbGF0dGVuKCkgIyBubyBkdXBsaWNhdGVzIGZvdW5kCmBgYAoKCiMjIyBTdGF0ZQoKYGBge3J9CiMgbGlzdGluZyBjb3VudCBieSBzdGF0ZQp0YWJfc3RhdGUgPSByZWFsX2VzdGF0ZSAlPiUgY291bnQoYWRkcmVzc19zdGF0ZSkKCiMgZnVuY3Rpb24gcmVmZXJlbmNlOiBodHRwczovL21lZGl1bS5jb20vQE5pY2tEb2VzRGF0YS92aXN1YWxpemluZy1nZW9ncmFwaGljLWRhdGEtaW4tci1mYjJlMGY1YjU5YzUKY3JlYXRlX2dyYWRpZW50X3N0YXRlX3RpbGVfbWFwIDwtIGZ1bmN0aW9uKHN0YXRlLCB2YWx1ZSwgdGl0bGUsIHN1YnRpdGxlLCBjYXB0aW9uLCBsZWdlbmRfdGl0bGUsIHN0YXRlX2dyaWQ9J3VzX3N0YXRlX2dyaWQyJykgewogIAogIGRmIDwtIGFzLnRpYmJsZShkYXRhLmZyYW1lKHN0YXRlLCB2YWx1ZSkpCiAgCiAgZmlnIDwtIGRmICU+JSAKICAgIG11dGF0ZSh4ID0gMSkgJT4lIAogICAgbXV0YXRlKGxhYmVsX3kgPSAuNSkgJT4lICAKICAgIG11dGF0ZShsYWJlbF94ID0gMSkgJT4lIAogICAgZ2dwbG90KCkrCiAgICBnZW9tX2JhcihtYXBwaW5nPWFlcyh4PXgsIGZpbGw9dmFsdWUpKSAgKwogICAgZmFjZXRfZ2VvKH4gc3RhdGUsIGdyaWQ9c3RhdGVfZ3JpZCkgKwogICAgZ2d0aXRsZSh0aXRsZSkgKwogICAgZ2VvbV90ZXh0KGFlcyh4PWxhYmVsX3gsIHk9bGFiZWxfeSwgbGFiZWw9c3RhdGUpLCBjb2xvcj0nI2ZmZmZmZicsIHNpemU9My41KSAKICAKICByZXR1cm4oZmlnKQp9CgpjcmVhdGVfZ3JhZGllbnRfc3RhdGVfdGlsZV9tYXAodGFiX3N0YXRlJGFkZHJlc3Nfc3RhdGUsIHRhYl9zdGF0ZSRuLCB0aXRsZT0nU2FsZSBsaXN0aW5nIGNvdW50IGJ5IHN0YXRlXG4nLCBsZWdlbmRfdGl0bGUgPSAiIiwgY2FwdGlvbj0iIikgKyAKICBzY2FsZV9maWxsX2NvbnRpbnVvdXNfc2VxdWVudGlhbChwYWxldHRlPSJIZWF0IiwgbGltaXRzPWMoMSwzMjMpLCBicmVha3M9YygxLDEwMCwyMDAsMzIzKSkgKwogIHRoZW1lX3ZvaWQoYmFzZV9zaXplPTEwKSArIAogIHRoZW1lKHN0cmlwLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBwbG90Lm1hcmdpbiA9IHVuaXQoYygxLDEuNSwxLDEuNSksICJjbSIpLAogICAgICAgIHBsb3QudGl0bGU9ZWxlbWVudF90ZXh0KHNpemU9MTEpLAogICAgICAgIGxlZ2VuZC50aXRsZT1lbGVtZW50X3RleHQoc2l6ZT05KSkgKyAKICBndWlkZXMoZmlsbCA9IGd1aWRlX2NvbG9yYmFyKHRpdGxlPSJDb3VudCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRpdGxlLnBvc2l0aW9uID0gInRvcCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiYXJ3aWR0aCA9IHVuaXQoLjUsICJsaW5lcyIpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYmFyaGVpZ2h0ID0gdW5pdCgxMCwgImxpbmVzIikpKSAKYGBgCgoKCmBgYHtyfQojIG1lZGlhbiBwcmljZSBieSBzdGF0ZQpyZV9zdCA9IHJlYWxfZXN0YXRlICU+JSBkcm9wX25hKHVuZm9ybWF0dGVkX3ByaWNlKSAlPiUKICBncm91cF9ieShhZGRyZXNzX3N0YXRlKSAlPiUKICBmaWx0ZXIoYWRkcmVzc19zdGF0ZSE9ICJEQyIpICU+JQogIHN1bW1hcmlzZShtZWRpYW5fcHJpY2U9bWVkaWFuKHVuZm9ybWF0dGVkX3ByaWNlKSkgCgpjcmVhdGVfZ3JhZGllbnRfc3RhdGVfdGlsZV9tYXAocmVfc3QkYWRkcmVzc19zdGF0ZSwgcmVfc3QkbWVkaWFuX3ByaWNlLCB0aXRsZT0nTWVkaWFuIHNhbGUgcHJpY2UgYnkgc3RhdGVcbicsIGxlZ2VuZF90aXRsZSA9ICIiLCBjYXB0aW9uPSIiKSArIAogIHNjYWxlX2ZpbGxfY29udGludW91c19zZXF1ZW50aWFsKHBhbGV0dGU9IkJhdGxvdyIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxhYmVscyA9IHVuaXRfZm9ybWF0KHVuaXQgPSAiSyIsIHNjYWxlID0gMWUtMykpICsKICB0aGVtZV92b2lkKGJhc2Vfc2l6ZT0xMCkgKyAKICB0aGVtZShzdHJpcC50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgcGxvdC5tYXJnaW4gPSB1bml0KGMoMSwxLjUsMSwxLjUpLCAiY20iKSwKICAgICAgICBwbG90LnRpdGxlPWVsZW1lbnRfdGV4dChzaXplPTExKSwKICAgICAgICBsZWdlbmQudGl0bGU9ZWxlbWVudF90ZXh0KHNpemU9OSkpICsgCiAgZ3VpZGVzKGZpbGwgPSBndWlkZV9jb2xvcmJhcih0aXRsZT0iTWVkaWFuIHByaWNlIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGl0bGUucG9zaXRpb24gPSAidG9wIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJhcndpZHRoID0gdW5pdCguNSwgImxpbmVzIiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiYXJoZWlnaHQgPSB1bml0KDEwLCAibGluZXMiKSkpIApgYGAKCiMjIyBBZGRyZXNzIGNpdHkKCmBgYHtyfQojIG1lZGlhbiBwcmljZSBieSBhZGRyZXNzX2NpdHkgKGNpdGllcyB3aXRoIG1vcmUgdGhhbiA0MCBsaXN0aW5ncykKcmVhbF9lc3RhdGUgJT4lIGdyb3VwX2J5KGFkZHJlc3NfY2l0eSkgJT4lIAogIHN1bW1hcmlzZShsaXN0aW5nX2NvdW50PW4oKSwgbWVkID0gbWVkaWFuKHVuZm9ybWF0dGVkX3ByaWNlKSkgJT4lIAogIGFycmFuZ2UoZGVzYyhsaXN0aW5nX2NvdW50KSkgJT4lCiAgZmlsdGVyKGxpc3RpbmdfY291bnQ+PTQwKSAlPiUKICBhcnJhbmdlKGRlc2MobWVkKSkgJT4lCiAgbXV0YXRlKGxhYiA9IHBhc3RlMChhZGRyZXNzX2NpdHksIiAiLCIoIiwibj0iLGxpc3RpbmdfY291bnQsIikiKSkgJT4lCiAgbXV0YXRlKGxhYj0gZmN0X2lub3JkZXIobGFiKSkgJT4lCiAgZ2dwbG90KGFlcyh5PSBmY3RfcmV2KGxhYiksIHg9IG1lZCwgZmlsbD1JKGlmZWxzZShtZWQ9PW1heChtZWQpLCAiI2Y2YmQ2MCIsIiNhZGI1YmQiKSkpKSArIAogIGdlb21fY29sKHdpZHRoPTAuNzUpICsgCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbD1zY2FsZXM6OmRvbGxhcihtZWQpKSxzaXplPTMsIGNvbG9yPSJibGFjayIsIGhqdXN0PTEuMikgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobGFiZWxzID0gdW5pdF9mb3JtYXQodW5pdCA9ICJLIiwgc2NhbGUgPSAxZS0zKSwgZXhwYW5kPWMoMCwwKSwgCiAgICAgICAgICAgICAgICAgICAgIGxpbWl0cz1jKDAsNzAwMDAwKSkgKyAKICBsYWJzKHg9ICJNZWRpYW4gc2FsZSBwcmljZSAoaW4gVVNEKSIsIHk9ICJDaXR5IChsaXN0aW5nIGNvdW50KSIsCiAgICAgICBzdWJ0aXRsZT0iTWVkaWFuIHNhbGUgcHJpY2UgYnkgY2l0eSAoY2l0aWVzIHdpdGggPj00MCBzYWxlIGxpc3RpbmdzKSIpCmBgYAoKIyMjIEZlYXR1cmVkIGxpc3RpbmcgCmBgYHtyfQojIGlzX2ZlYXR1cmVkX2xpc3RpbmcgYW5kIHByaWNlCnBzeWNoOjpkZXNjcmliZUJ5KHJlYWxfZXN0YXRlJHVuZm9ybWF0dGVkX3ByaWNlLCByZWFsX2VzdGF0ZSRpc19mZWF0dXJlZF9saXN0aW5nLCBtYXQ9VCkKYGBgCgojIyMgU3RhdHVzIHRleHQKYGBge3J9CiMgc3RhdHVzIHRleHQgYW5kIHByaWNlCnJlYWxfZXN0YXRlICU+JSBjb3VudChzdGF0dXNfdGV4dCkgJT4lIGFycmFuZ2UoZGVzYyhuKSkKCnJlYWxfZXN0YXRlICU+JSBkcm9wX25hKHVuZm9ybWF0dGVkX3ByaWNlKSAlPiUKICBtdXRhdGUoc3RhdHVzX2dycCA9IGZjdF9sdW1wKGZhY3RvcihzdGF0dXNfdGV4dCksIDYsb3RoZXJfbGV2ZWwgPSAiT3RoZXIgc3RhdHVzIHRleHQiKSkgJT4lCiAgbXV0YXRlKHN0YXR1c19ncnAgPSBzdHJfcmVtb3ZlX2FsbChzdGF0dXNfZ3JwLCJmb3Igc2FsZSIpKSAlPiUgCiAgbXV0YXRlKHN0YXR1c19ncnAgPSBpZl9lbHNlKHN0YXR1c19ncnA9PSJIb21lICIsIk90aGVyIHN0YXR1cyB0ZXh0IixzdGF0dXNfZ3JwKSkgJT4lCiAgZ3JvdXBfYnkoc3RhdHVzX2dycCkgJT4lCiAgc3VtbWFyaXNlKG1lZGlhbj1tZWRpYW4odW5mb3JtYXR0ZWRfcHJpY2UpLAogICAgICAgICAgICBtaW49bWluKHVuZm9ybWF0dGVkX3ByaWNlKSwgbWF4PW1heCh1bmZvcm1hdHRlZF9wcmljZSksIHJhbmdlPW1heC1taW4sCiAgICAgICAgICAgIG1lYW4gPSBtZWFuKHVuZm9ybWF0dGVkX3ByaWNlKSkgJT4lCiAgcmVuYW1lKHN0YXR1c190ZXh0PSBzdGF0dXNfZ3JwKSAlPiUKICBhcnJhbmdlKGRlc2MobWVkaWFuKSkgJT4lCiAgZ3QoKSAlPiUKICBmbXRfbnVtYmVyKAogICAgY29sdW1ucyA9IGMoIm1lZGlhbiIsIm1pbiIsIm1heCIsInJhbmdlIiwibWVhbiIpLAogICAgZGVjaW1hbHM9MAogICkgJT4lCiAgZGF0YV9jb2xvcigKICAgIGNvbHVtbnMgPWMoIm1lZGlhbiIsIm1pbiIsIm1heCIsInJhbmdlIiwibWVhbiIpLAogICAgY29sb3JzID0gc2NhbGVzOjpjb2xfbnVtZXJpYygKICAgICAgcGFsZXR0ZSA9IGMoIiNmZmZmZmYiLCAiI2YyZmJkMiIsICIjYzllY2I0IiwgIiM5M2QzYWIiLCAiIzM1YjBhYiIpLAogICAgICBkb21haW4gPSBOVUxMCiAgICApCiAgKSAlPiUKICAgdGFiX3N0eWxlKAogICAgc3R5bGUgPSBsaXN0KAogICAgICBjZWxsX2JvcmRlcnMoCiAgICAgICAgc2lkZXMgPSAiYm90dG9tIiwKICAgICAgICBjb2xvciA9ICJibGFjayIsCiAgICAgICAgd2VpZ2h0ID0gcHgoMykKICAgICAgKQogICAgKSwKICAgIGxvY2F0aW9ucyA9IGxpc3QoCiAgICAgIGNlbGxzX2NvbHVtbl9sYWJlbHMoCiAgICAgICAgY29sdW1ucyA9IGd0OjpldmVyeXRoaW5nKCkKICAgICAgKQogICAgKQogICkgJT4lCiAgdGFiX29wdGlvbnModGFibGUuZm9udC5zaXplPTE0KSAlPiUKICB0YWJfaGVhZGVyKHRpdGxlPW1kKCIqKlNhbGUgcHJpY2UgYnkgc3RhdHVzIHRleHQqKiIpKQpgYGAKCgojIyMgUHJpY2UsIGJlZHMsIGJhdGhzIGFuZCBhcmVhCgpgYGB7ciwgd2FybmluZz1GLCBtZXNzYWdlPUZ9CiMgYm94cGxvdCAocHJpY2UsIGJlZHMsIGJhdGhzLCBhcmVhKQpyZWFsX2VzdGF0ZSAlPiUgc2VsZWN0ICh6cGlkLCB1bmZvcm1hdHRlZF9wcmljZSwgYmVkcywgYmF0aHMsIGFyZWEpICU+JSAKICBwaXZvdF9sb25nZXIoIXpwaWQpICU+JQogIGdncGxvdChhZXMoeD12YWx1ZSwgY29sb3I9bmFtZSkpICsgCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuYWxwaGEgPSAwLjgsIG91dGxpZXIuc2hhcGUgPSAyMSwgc2hvdy5sZWdlbmQgPSBGKSArIAogIGZhY2V0X3dyYXAofm5hbWUsIHNjYWxlcyA9ICJmcmVlIiwgbmNvbD0xLCBzdHJpcC5wb3NpdGlvbiA9ICJsZWZ0IikgKyAKICB0aGVtZV9saWdodChiYXNlX3NpemU9MTApICsKICB0aGVtZShzdHJpcC5wbGFjZW1lbnQgPSAib3V0c2lkZSIsCiAgICAgICAgYXhpcy50ZXh0LnkgPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgYXhpcy50aWNrcy55PWVsZW1lbnRfYmxhbmsoKQogICAgICAgICkgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobGFiZWxzPXNjYWxlczo6Y29tbWEpICsgCiAgc2NhbGVfeV9jb250aW51b3VzKGJyZWFrcz1jKDApKSArCiAgc2NhbGVfY29sb3JfbnBnKCkKYGBgCgojIyMgQXJlYQpgYGB7ciwgd2FybmluZz1GLCBtZXNzYWdlPUZ9CiMgYXJlYSBhbmQgcHJpY2UKcmVhbF9lc3RhdGUgJT4lIAogIHNlbGVjdCh1bmZvcm1hdHRlZF9wcmljZSwgYXJlYSkgJT4lCiAgZHJvcF9uYSgpICU+JQogIG11dGF0ZShwcmljZV96cyA9ICh1bmZvcm1hdHRlZF9wcmljZS0gbWVhbih1bmZvcm1hdHRlZF9wcmljZSkpLyBzZCh1bmZvcm1hdHRlZF9wcmljZSksCiAgICAgICAgIGFyZWFfenM9KGFyZWEtIG1lYW4oYXJlYSkpLyBzZChhcmVhKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4ocHJpY2VfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGFyZWFfenMsLTMsMykpICU+JQogIGdncGxvdChhZXMoeD1hcmVhLCB5PXVuZm9ybWF0dGVkX3ByaWNlKSkgKyAKICBnZW9tX3BvaW50KHNoYXBlPTIxLCBhbHBoYT0wLjUpICsgCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSdsbScsIHNlPUZBTFNFLCBjb2xvcj0iI2Y2YmQ2MCIpICsgCiAgc3RhdF9jb3IobWV0aG9kPSJwZWFyc29uIixsYWJlbC54PTE1MDAwLCBzaXplPTMuNSkgKyAKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzPXNjYWxlczo6bGFiZWxfbnVtYmVyX3NpKCkpICsgCiAgc2NhbGVfeF9jb250aW51b3VzKGxhYmVscyA9IHVuaXRfZm9ybWF0KHVuaXQgPSAiSyIsIHNjYWxlID0gMWUtMywgYWNjdXJhY3kgPSAxKSkgKwogIGxhYnMoeD0iQXJlYSIsIHk9IlNhbGUgcHJpY2UgKGluIFVTRCkiLCBzdWJ0aXRsZT0iQXJlYSBhbmQgc2FsZSBwcmljZSIpCmBgYAoKIyMjIEJhdGhzCmBgYHtyfQojIGRyb3AgcHJpY2Ugb3V0bGllcnMKcmVfcHJpY2UgPSByZWFsX2VzdGF0ZSAlPiUgCiAgZHJvcF9uYSh1bmZvcm1hdHRlZF9wcmljZSkgJT4lCiAgbXV0YXRlKHByaWNlX3pzID0gKHVuZm9ybWF0dGVkX3ByaWNlLSBtZWFuKHVuZm9ybWF0dGVkX3ByaWNlKSkvIHNkKHVuZm9ybWF0dGVkX3ByaWNlKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4ocHJpY2VfenMsLTMsMykpIApzdW1tYXJ5KHJlX3ByaWNlJHVuZm9ybWF0dGVkX3ByaWNlKQpgYGAKCgpgYGB7cn0KIyBsaXN0aW5nIGNvdW50IGJ5IG51bWJlciBvZiBiYXRocyAKcmVfcHJpY2UgJT4lIG11dGF0ZShiYXRoczIgPSBhcy5pbnRlZ2VyKGZsb29yKGJhdGhzKSkpICU+JSAKICBjb3VudChiYXRoczIpICU+JQogIGdncGxvdChhZXMoeT1mY3RfcmV2KGZhY3RvcihiYXRoczIpKSwgeD1uKSkgKyAKICBnZW9tX3NlZ21lbnQoYWVzKHg9MCwgeGVuZD1uLCB5PWZjdF9yZXYoZmFjdG9yKGJhdGhzMikpLHllbmQ9ZmN0X3JldihmYWN0b3IoYmF0aHMyKSkpKSArCiAgZ2VvbV9wb2ludCgpICsgCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbD1uKSxzaXplPTMsIGhqdXN0PS0wLjcpICsgCiAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cz1jKDAsMzUwMCkpICsKICBsYWJzKHg9IlNhbGUgbGlzdGluZyBjb3VudCIsIHk9Ik51bWJlciBvZiBiYXRocyIsIHN1YnRpdGxlPSJTYWxlIGxpc3RpbmcgY291bnQgYnkgbnVtYmVyIG9mIGJhdGhzIikKCiMgYmF0aHMgYW5kIHByaWNlICAKcmVfcHJpY2UgJT4lIG11dGF0ZShiYXRoczIgPSBhcy5pbnRlZ2VyKGZsb29yKGJhdGhzKSkpICU+JQogIG11dGF0ZShiYXRoczJfZ3JwID0gZmN0X2x1bXAoZmFjdG9yKGJhdGhzMiksIDYsb3RoZXJfbGV2ZWwgPSAiNyAtIDI0IikpICU+JSAKICBnZ3Bsb3QoYWVzKHk9ZmN0X3JldihiYXRoczJfZ3JwKSwgeD11bmZvcm1hdHRlZF9wcmljZSwgY29sb3I9ZmN0X3JldihiYXRoczJfZ3JwKSkpICsgCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuc2hhcGUgPSAyMSwgb3V0bGllci5hbHBoYSA9IDAuNSwgc2hvdy5sZWdlbmQgPSBGKSArIAogIHNjYWxlX3hfY29udGludW91cyhsYWJlbHM9c2NhbGVzOjpsYWJlbF9udW1iZXJfc2koKSkgKyAKICBsYWJzKHg9IlNhbGUgcHJpY2UgKGluIFVTRCkiLCB5PSJOdW1iZXIgb2YgYmF0aHMiLCBzdWJ0aXRsZT0iTnVtYmVyIG9mIGJhdGhzIGFuZCBzYWxlIHByaWNlIikgKyAKICBzY2FsZV9jb2xvcl9hYWFzKG5hLnZhbHVlPSJibGFjayIpCmBgYAojIyMgQmVkcwoKYGBge3J9CiMgbGlzdGluZyBjb3VudCBieSBudW1iZXIgb2YgYmVkcwpyZV9wcmljZSAlPiUgbXV0YXRlKGJlZHNfZ3JwID0gZmN0X2x1bXAoZmFjdG9yKGJlZHMpLCA3LG90aGVyX2xldmVsID0gIj43IikpICU+JQogIGNvdW50KGJlZHNfZ3JwKSAlPiUgCiAgZ2dwbG90KGFlcyh5PWZjdF9yZXYoYmVkc19ncnApLCB4PW4sIGZpbGw9bikpICsgCiAgZ2VvbV9jb2wod2lkdGg9MC43NSwgc2hvdy5sZWdlbmQ9RikgKyAKICBnZW9tX3RleHQoYWVzKGxhYmVsPW4pLHNpemU9MywgaGp1c3Q9MS4yLCBjb2xvcj0id2hpdGUiKSAgKwogIHNjYWxlX2ZpbGxfY29udGludW91c19zZXF1ZW50aWFsKHBhbGV0dGU9IlJlZC1CbHVlIikgKwogIGxhYnMoeD0iU2FsZSBsaXN0aW5nIGNvdW50IiwgeT0iTnVtYmVyIG9mIGJlZHMiLCBzdWJ0aXRsZT0iU2FsZSBsaXN0aW5nIGNvdW50IGJ5IG51bWJlciBvZiBiZWRzIikKCiMgYmVkcyBhbmQgcHJpY2UKcmVfcHJpY2UgJT4lIG11dGF0ZShiZWRzX2dycCA9IGZjdF9sdW1wKGZhY3RvcihiZWRzKSwgNyxvdGhlcl9sZXZlbCA9ICI+NyIpKSAlPiUKICBnZ3Bsb3QoYWVzKHk9ZmN0X3JldihiZWRzX2dycCksIHg9dW5mb3JtYXR0ZWRfcHJpY2UsIGNvbG9yPWZjdF9yZXYoYmVkc19ncnApKSkgKyAKICBnZW9tX2JveHBsb3Qob3V0bGllci5zaGFwZSA9IDIxLCBvdXRsaWVyLmFscGhhID0gMC41LCBzaG93LmxlZ2VuZD1GKSArIAogIHNjYWxlX3hfY29udGludW91cyhsYWJlbHM9c2NhbGVzOjpsYWJlbF9udW1iZXJfc2koKSkgKyAKICBsYWJzKHg9IlNhbGUgcHJpY2UgKGluIFVTRCkiLCB5PSJOdW1iZXIgb2YgYmVkcyIsIHN1YnRpdGxlPSJOdW1iZXIgb2YgYmVkcyBhbmQgc2FsZSBwcmljZSIpICsgCiAgc2NhbGVfY29sb3JfYWFhcyhuYS52YWx1ZT0iYmxhY2siKQpgYGAKIyMjIENvcnJlbGF0aW9uCgpgYGB7cn0KIyBjb3JyZWxhdGlvbgpyZTEgPSByZWFsX2VzdGF0ZSAlPiUgCiAgc2VsZWN0KHVuZm9ybWF0dGVkX3ByaWNlLCBiYXRocywgYmVkcywgYXJlYSkgJT4lIAogIGRyb3BfbmEoKQpkaW0ocmUxKQoKc2V0LnNlZWQoMTIzKQpnZ2NvcnJtYXQoCiAgZGF0YT1yZTEsCiAgY29yLnZhcnM9Yyh1bmZvcm1hdHRlZF9wcmljZTphcmVhKSwKICB0aXRsZT0iQ29ycmVsYXRpb24iLAopCmBgYAoKYGBge3J9CiMgZHJvcCBvdXRsaWVycyAKcmUyID0gcmUxICU+JSBtdXRhdGUocHJpY2VfenMgPSAodW5mb3JtYXR0ZWRfcHJpY2UtIG1lYW4odW5mb3JtYXR0ZWRfcHJpY2UpKS8gc2QodW5mb3JtYXR0ZWRfcHJpY2UpLAogICAgICAgICBhcmVhX3pzPShhcmVhLSBtZWFuKGFyZWEpKS8gc2QoYXJlYSksCiAgICAgICAgIGJlZHNfenM9KGJlZHMtIG1lYW4oYmVkcykpLyBzZChiZWRzKSwKICAgICAgICAgYmF0aHNfenM9KGJhdGhzLSBtZWFuKGJhdGhzKSkvIHNkKGJhdGhzKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4ocHJpY2VfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGFyZWFfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGJlZHNfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGJhdGhzX3pzLC0zLDMpKSAlPiUKICBzZWxlY3QodW5mb3JtYXR0ZWRfcHJpY2UsIGJhdGhzLCBiZWRzLCBhcmVhKQpkaW0ocmUyKQoKIyBzY2FsZSAKcmUyX3NjYWxlZCA9IHNjYWxlKHJlMikKYGBgCgpgYGB7cn0KIyBjb3JyZWxhdGlvbiBhZnRlciBkcm9wcGluZyBvdXRsaWVycwpzZXQuc2VlZCgxMjMpCmdnY29ycm1hdCgKICBkYXRhPXJlMiwKICBjb3IudmFycz1jKHVuZm9ybWF0dGVkX3ByaWNlOmFyZWEpLAogIHRpdGxlPSJDb3JyZWxhdGlvbiIsCiAgc3VidGl0bGU9IkFmdGVyIGRyb3BwaW5nIG91dGxpZXJzIgopCmBgYAoKIyMjIENsdXN0ZXJpbmcKKiBwcmljZSwgYmF0aHMsIGJlZHMsIGFyZWEKCmBgYHtyfQojIGNoZWNrIG9wdGltYWwgY2x1c3RlcnM6IGVsYm93IG1ldGhvZCAKc2V0LnNlZWQoMTIzKQpmdml6X25iY2x1c3QocmUyX3NjYWxlZCxrbWVhbnMsbWV0aG9kPSJ3c3MiKQpgYGAKCgpgYGB7cn0KIyBrIG1lYW5zIGNsdXN0ZXJpbmc6IDQgY2x1c3RlcnMKc2V0LnNlZWQoMTIzKQprbTQ9IGttZWFucyhyZTJfc2NhbGVkLGNlbnRlcnM9NCxuc3RhcnQ9NTApCmttNApmdml6X2NsdXN0ZXIoa200LCBkYXRhPXJlMl9zY2FsZWQsIGxhYmVsc2l6ZT0wKSAjY2x1c3RlciBwbG90CndpdGgocmUyLHBhaXJzKHJlMl9zY2FsZWQsY29sPSgxOjQpW2ttNCRjbHVzdGVyXSkpICNwYWlyIHBsb3QKYGBgCgoqIGNsdXN0ZXIgc2l6ZSAobGFyZ2VzdCB0byBzbWFsbGVzdCk6IEMzID4gQzQgPiBDMSA+IEMyICAgCiogQzE6IG49MTAxOSwgaGlnaGVzdCBtZWFuIGJlZHMsIDJuZCBoaWdoZXN0IG1lYW4gcHJpY2UvYXJlYS9iYXRocyAgIAoqIEMyOiBuPTE4LCBoaWdoZXN0IG1lYW4gcHJpY2UvYXJlYS9iYXRocywgMm5kIGhpZ2hlc3QgbWVhbiBiZWRzICAgIAoqIEMzOiBuPTMzMTAsIDJuZCBsb3dlc3QgbWVhbiBwcmljZS9iYXRocy9iZWRzL2FyZWEgICAKKiBDNDogbj0yMDgzLCBsb3dlc3QgbWVhbiBwcmljZS9iYXRocy9iZWRzL2FyZWEKCgpgYGB7cn0KI3N1bW1hcnkgYnkgazQgY2x1c3RlciBJRApyZTJfY2lkID0gcmUyICU+JSBtdXRhdGUoY2x1c3Rlcl9pZCA9IGFzLmZhY3RvcihrbTQkY2x1c3RlcikpCmJ5KHJlMl9jaWQsIHJlMl9jaWQkY2x1c3Rlcl9pZCwgc3VtbWFyeSkKYGBgCgoKIyMjIEZ1cnRoZXIgZGF0YSBjbGVhbmluZyAKCmBgYHtyfQojIGNsZWFuIGRmCmNsZWFuX2RmID0gcmVhbF9lc3RhdGVbIWR1cGxpY2F0ZWQoYXMubGlzdChyZWFsX2VzdGF0ZSkpXSAlPiUgIyBkcm9wIGlkIGNvbAogIGRyb3BfbmEodW5mb3JtYXR0ZWRfcHJpY2UpICU+JSAjIGRyb3AgbGlzdGluZ3Mgd2l0aG91dCBwcmljZQogIG11dGF0ZV9hdCh2YXJzKGJhdGhzLCBiZWRzKSwgfnJlcGxhY2VfbmEoLiwgMCkpICNyZXBsYWNlIE5BIHdpdGggMCAKZGltKGNsZWFuX2RmKQpgYGAKCmBgYHtyfQojIGNvdW50IG9mIG1pc3NpbmcgaW4gYXJlYSBjb2wgCnN1bShpcy5uYShjbGVhbl9kZiRhcmVhKSkKCiMgZGYgd2l0aG91dCBuYSBhcmVhIGNvbCAKY2RmMmEgPSBjbGVhbl9kZiAlPiUgZHJvcF9uYShhcmVhKQpkaW0oY2RmMmEpCgojIGRmIHdpdGhvdXQgbmEgYXJlYSBjb2wgYW5kIG91dGxpZXJzCmNkZjJiID0gY2RmMmEgJT4lICNzZWxlY3QodW5mb3JtYXR0ZWRfcHJpY2UsIGFyZWEsIGJlZHMsIGJhdGhzKSAlPiUKICAjIGdldCB6c2NvcmUKICBtdXRhdGUocHJpY2VfenMgPSAodW5mb3JtYXR0ZWRfcHJpY2UtIG1lYW4odW5mb3JtYXR0ZWRfcHJpY2UpKS8gc2QodW5mb3JtYXR0ZWRfcHJpY2UpLAogICAgICAgICBhcmVhX3pzPShhcmVhLSBtZWFuKGFyZWEpKS8gc2QoYXJlYSksCiAgICAgICAgIGJlZHNfenM9KGJlZHMtIG1lYW4oYmVkcykpLyBzZChiZWRzKSwKICAgICAgICAgYmF0aHNfenM9KGJhdGhzLSBtZWFuKGJhdGhzKSkvIHNkKGJhdGhzKSkgJT4lCiAgIyBkcm9wIG91dGxpZXJzCiAgZmlsdGVyKGJldHdlZW4ocHJpY2VfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGFyZWFfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGJlZHNfenMsLTMsMykpICU+JQogIGZpbHRlcihiZXR3ZWVuKGJhdGhzX3pzLC0zLDMpKSAKZGltKGNkZjJiKQoKbnJvdyhjZGYyYSkgLSBucm93KGNkZjJiKQpgYGAKCgpgYGB7cn0KIyBkZiB3aXRoIG1pc3NpbmcgYXJlYSBpbXB1dGF0aW9uIGFuZCB3aXRob3V0IG91dGxpZXJzIAoKY2RmMyA9IGNsZWFuX2RmICU+JQogICMgaW1wdXRlIG1pc3NpbmcgdmFscyB1c2luZyBtZWRpYW4gb2YgYmVkIGFuZCBiYXRoCiAgZ3JvdXBfYnkoYmVkcywgYmF0aHMpICU+JQogIG11dGF0ZShhcmVhMj1pZmVsc2UoaXMubmEoYXJlYSksbWVkaWFuKGFyZWEsbmEucm09VFJVRSksYXJlYSkpICU+JSAKICBkcm9wX25hKGFyZWEyKSAlPiUgCiAgdW5ncm91cCgpICU+JQogICMgZ2V0IHpzY29yZQogIG11dGF0ZShwcmljZV96cyA9ICh1bmZvcm1hdHRlZF9wcmljZS0gbWVhbih1bmZvcm1hdHRlZF9wcmljZSkpLyBzZCh1bmZvcm1hdHRlZF9wcmljZSksCiAgICAgICAgIGFyZWFfenM9KGFyZWEyLSBtZWFuKGFyZWEyKSkvIHNkKGFyZWEyKSwKICAgICAgICAgYmVkc196cz0oYmVkcy0gbWVhbihiZWRzKSkvIHNkKGJlZHMpLAogICAgICAgICBiYXRoc196cz0oYmF0aHMtIG1lYW4oYmF0aHMpKS8gc2QoYmF0aHMpKSAlPiUKICAjIGRyb3Agb3V0bGllcnMKICBmaWx0ZXIoYmV0d2VlbihwcmljZV96cywtMywzKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4oYXJlYV96cywtMywzKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4oYmVkc196cywtMywzKSkgJT4lCiAgZmlsdGVyKGJldHdlZW4oYmF0aHNfenMsLTMsMykpIApkaW0oY2RmMykKCmBgYAoKCgoKCgoKCgo=