Machine learning linear regression model to predict condominium sale prices and answer these questions: - How well does the size of a condominium (measured in gross square feet) explain or predict sale price across New York City as a whole? - How well does the size of a condominium explain or predict sale price for each individual borough?

library(readr)
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
library(stringr)
library(ggplot2)
library(dplyr)
library(purrr)
library(tidyverse)
library(ggplot2)
library(tidyr)
library(broom)

Import the data as a data frame and rename the columns.

setwd("C:/Users/Ana/Desktop/Data Analytics/CSV Files")
NYC_property_sales <- read_csv("NYC_property_sales.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  borough = col_character(),
  neighborhood = col_character(),
  building_class_category = col_character(),
  tax_class_at_present = col_character(),
  building_class_at_present = col_character(),
  address = col_character(),
  apartment_number = col_character(),
  building_class_at_time_of_sale = col_character(),
  sale_date = col_datetime(format = "")
)
See spec(...) for full column specifications.
#head(NYC_property_sales, 100)

#filter to only include the building class "condominiums with elevators"

NYC_condos <- NYC_property_sales %>%
  filter(building_class_at_time_of_sale == "R4")

head(NYC_condos)

Generate a scatterplot using the NYC_condos dataframe with gross_square_feet on the x-axis and sale_price on the y-axis

ggplot(data = NYC_condos,
       aes(x = gross_square_feet, y = sale_price, color = borough)) +
    geom_point(alpha = 0.5) +
    scale_y_continuous(label = scales::comma) +
    xlim(0, 12000) + 
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Condominium Sale Price vs Floor Area", x = "Condominium size (sq ft)", y = "Sale Price ($)") +
    theme(panel.background = element_rect(fill = "white")) 

Manually set the limits of the plot

#set the limits of the plot to x = max $5,000, y = max $20,000,000

ggplot(data = NYC_condos,
       aes(x = gross_square_feet, y = sale_price, color = borough)) +
    geom_point(alpha = 0.5) +
    scale_y_continuous(label = scales::comma, limits = c(0, 20000000)) +
    xlim(0, 5000) + 
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Condominium Sale Price vs Floor Area", x = "Condominium size (sq ft)", y = "Sale Price ($)") +
    theme(panel.background = element_rect(fill = "white")) 

Split the plot by borough

#facet wrap by borough

ggplot(data = NYC_condos,
       aes(x = gross_square_feet, y = sale_price)) +
    geom_point(alpha = 0.5) +
    scale_y_continuous(label = scales::comma) +
    #xlim(0, 5000) + 
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Condominium Sale Price vs Floor Area", x = "Condominium size (sq ft)", y = "Sale Price ($)") +
    theme(panel.background = element_rect(fill = "white")) +
    facet_wrap(~borough, scales = "free", ncol = 2)

The plots above show that there is likely a correlation between gross_square_feet and sale_price. However it also appears that the regression line is affected by outliers. Next step is to remove some of the outlier values that we know are incorrect.

Then re-plot the scatter plots

#remove some erronous data 

NYC_condos_original <- NYC_condos

#remove entries from that correspond to sale_prices > $200 million. One is the price of multiple units and one is for the most expensive apartment ever sold. As this is not particularly representative of the general data, it will be removed as it may cause the line of best fit to be skewed away from the majority of the data.

NYC_condos_2 <- NYC_condos %>%
  filter(sale_price < 200000000)

nrow(NYC_condos_2)
[1] 8094
#filter out rows that correspond to multiple unit sales 

NYC_condos_3 <- NYC_condos_2 %>% 
  group_by(sale_price, sale_date) %>% 
  filter(n() <= 2) %>% 
  arrange(desc(sale_price))

head(NYC_condos_3)

nrow(NYC_condos_3)
[1] 7945
ggplot(data = NYC_condos_3,
       aes(x = gross_square_feet, y = sale_price)) +
    geom_point(alpha = 0.5) +
    scale_y_continuous(label = scales::comma) +
    #xlim(0, 5000) + 
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Condominium Sale Price vs Floor Area", x = "Condominium size (sq ft)", y = "Sale Price ($)") +
    theme(panel.background = element_rect(fill = "white")) +
    facet_wrap(~borough, scales = "free", ncol = 2)

#explore other potentially erronous data

NYC_condos_explore <- NYC_condos_3 %>%
  filter(borough == "Manhattan", sale_price > 60000000, gross_square_feet < 2500)

#NYC_condos_explore

#even though there are some further data that could be errors, there is no way from telling from the data set provided. Therefore, they will be left in the data set. For example an apartment in Manhattan with 862 sq ft but sale price $74,400,000 is unlikely to be correct, but there is no way of telling if it is or not. 

Create linear regression models

#create linear regression models

NYC_condos_lm_original <- lm(sale_price ~ gross_square_feet, data = NYC_condos_original)
NYC_condos_lm <- lm(sale_price ~ gross_square_feet, data = NYC_condos_3)

summary(NYC_condos_lm_original)

Call:
lm(formula = sale_price ~ gross_square_feet, data = NYC_condos_original)

Residuals:
      Min        1Q    Median        3Q       Max 
-79533546  -1211195   -860173   -251714 211550415 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       940683.39   57703.86   16.30   <2e-16 ***
gross_square_feet   1192.72      19.43   61.39   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4745000 on 8094 degrees of freedom
Multiple R-squared:  0.3177,    Adjusted R-squared:  0.3176 
F-statistic:  3769 on 1 and 8094 DF,  p-value: < 2.2e-16
summary(NYC_condos_lm)

Call:
lm(formula = sale_price ~ gross_square_feet, data = NYC_condos_3)

Residuals:
      Min        1Q    Median        3Q       Max 
-17373592   -767805     57079    722027  73491983 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -2.419e+06  4.871e+04  -49.65   <2e-16 ***
gross_square_feet  3.859e+03  3.423e+01  112.75   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2442000 on 7943 degrees of freedom
Multiple R-squared:  0.6155,    Adjusted R-squared:  0.6154 
F-statistic: 1.271e+04 on 1 and 7943 DF,  p-value: < 2.2e-16

A bivariate linear regression model has been carried out on the sale price and condominium size data. The hypothesis is that there is a relationship between sale price and condominium size. The null hypothesis is that there is no relationship between sale price and condominium size.

The first thing to look at is the value of β1 i.e. the gradient of the regression line. The gradient in the original dataset is 1193. The gradient in the cleaned dataset is 3859. Both of these values are far from zero so it would suggest there is a relationship however there are other statistics that need to be considered before making a final conclusion.

The t-statistic is the number of standard deviations that β1 is from 0. A large value of the t-statistic suggests that we can reject the null hypothesis. The t-statistic is 61 in the original dataset and 113 in the cleaned data set. This means that in both datasets, the t-statistic suggests the null hypothesis can be rejected.

We can also look at the p-value, which is the probability of observing any value (of β1) equal-to or larger than t if the null hypothesis (H0) is true.The p-value is the probability of the result occurring by chance. So a p-value of 0.9 means that the occurance is very likely to happen by chance and therefore is not down to the predictor variable. Therefore the null hypothesis stands. However, in both these datasets the p-value is neglidgible which means that is it incredibly unlikely that the response we see here is by chance. Therefore, we can reject the null hypothesis that there is no correlation between the predictor and response variable. i.e. it is extremely unlikely that the relationship between condominium size and sale price observed in this dataset is due to random chance.

RSE is the average amount that the response variable measurements deviate from the true regression line. A lower value for RSE means an overall better fit. The RSE is 4,745,000 in the original dataset and 2,442,000 after data cleaning.

R^2 is a measure of the proportion of the variability in our response which can be explained by the predictor variable. R^2 is a value between 0 and 1. A value nearer to 1 means a better fit. The adjusted R^2 value was 0.32 in the original dataset and 0.62 in the cleaned dataset. This means that 62% of the variability in our response variable can be explained by the predictor variable.

The standard error of the regression provides the absolute measure of the typical distance that the data points fall from the regression line. SE is in the units of the dependent variable.

confint(NYC_condos_lm_original)
                       2.5 %      97.5 %
(Intercept)       827568.990 1053797.787
gross_square_feet   1154.636    1230.802
confint(NYC_condos_lm)
                         2.5 %       97.5 %
(Intercept)       -2514141.303 -2323159.698
gross_square_feet     3792.148     3926.339

Lastly, the confidence intervals for the original (uncleaned dataset) is 1155 - 1231 $ per square foot. The confidence intervals for the cleanded dataset is 3792 - 3926 $ per square foot. The confidence intervals for the original dataset is lower. This is likely due to the high value low floor area condominiums that were included in the original dataset but were removed in the cleaned dataset.

Next, nest the dataframe and fit models to the dataframe. Apply the broom() functions tidy() to each nested model to extract the model coefficients.

NYC_nested <- NYC_condos_3 %>% 
   group_by(borough) %>% 
   nest() %>% 
   mutate(linear_model = map(.x = data, 
                             .f = ~lm(sale_price ~ gross_square_feet, 
                                      data = .))) %>%
   mutate(tidy_coefficients = map(.x = linear_model, 
                               .f = tidy, 
                               conf.int = TRUE)) 
#print nested dataframe
NYC_nested

#print data for the Bronx
NYC_nested$data[[3]]

summary(NYC_nested$linear_model[[3]])

Call:
lm(formula = sale_price ~ gross_square_feet, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-797569  -63973  -14647   30202 1493512 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -254011.87   24667.81  -10.30   <2e-16 ***
gross_square_feet     648.96      29.57   21.94   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 151500 on 328 degrees of freedom
Multiple R-squared:  0.5948,    Adjusted R-squared:  0.5936 
F-statistic: 481.5 on 1 and 328 DF,  p-value: < 2.2e-16

Extract the tidy model coefficents

NYC_coefficients <- NYC_nested %>%
    select(borough, tidy_coefficients) %>%
    unnest(cols = tidy_coefficients)

NYC_slope <- NYC_coefficients %>%
    filter(term == "gross_square_feet") %>%
    arrange(estimate)

NYC_slope

#extract the nested linear model object for one of the boroughs and use the summary() function to inspect the results

Again, nest the dataframe and fit models to the dataframe. This time, apply the broom() function glance() to each nested model to extract the summary statistics.

NYC_nested_2 <- NYC_condos_3 %>% 
   group_by(borough) %>% 
   nest() %>% 
   mutate(linear_model = map(.x = data, 
                             .f = ~lm(sale_price ~ gross_square_feet, 
                                      data = .))) %>%
    mutate(tidy_summary_stats = map(.x = linear_model,
                                    .f = glance))

#print nested dataframe
NYC_nested_2

#print data for the Bronx
NYC_nested_2$data[[3]]
NA

Extract the summary statistics.


NYC_summary_stats <- NYC_nested_2 %>%
    select(borough, tidy_summary_stats) %>%
    unnest(cols = tidy_summary_stats)

NYC_summary_stats
NA

The analysis has shown that, overall, we can say that sale_price can be predicted by gross_square_area. For each borough the t-statistic is sufficiently high and the p_value sufficiently low that we can say that there is a relationship between sale_price and gross_square_area. Some of the boroughs have regression lines that fit the data better than others. This is demonstrated by the adjusted R-squared value. A higher value means that more of the variability in the response variable can be attributed to the predictor variable. This is the case for Manhattan. However the fit for Queens is less good as the r-squared value is only 34%. This is evidenced in the scatter plots as you can see that the points for these two boroughs are spread further from the regression line.

LS0tDQp0aXRsZTogIkRhdGFRdWVzdCBQcm9qZWN0OiBQcmVkaWN0aW5nIENvbmRvbWluaXVtIFNhbGUgUHJpY2VzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KTWFjaGluZSBsZWFybmluZyBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbCB0byBwcmVkaWN0IGNvbmRvbWluaXVtIHNhbGUgcHJpY2VzIGFuZCBhbnN3ZXIgdGhlc2UgcXVlc3Rpb25zOg0KLSBIb3cgd2VsbCBkb2VzIHRoZSBzaXplIG9mIGEgY29uZG9taW5pdW0gKG1lYXN1cmVkIGluIGdyb3NzIHNxdWFyZSBmZWV0KSBleHBsYWluIG9yIHByZWRpY3Qgc2FsZSBwcmljZSBhY3Jvc3MgTmV3IFlvcmsgQ2l0eSBhcyBhIHdob2xlPw0KLSBIb3cgd2VsbCBkb2VzIHRoZSBzaXplIG9mIGEgY29uZG9taW5pdW0gZXhwbGFpbiBvciBwcmVkaWN0IHNhbGUgcHJpY2UgZm9yIGVhY2ggaW5kaXZpZHVhbCBib3JvdWdoPw0KIA0KYGBge3J9DQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShzdHJpbmdyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocHVycnIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkodGlkeXIpDQpsaWJyYXJ5KGJyb29tKQ0KYGBgDQpJbXBvcnQgdGhlIGRhdGEgYXMgYSBkYXRhIGZyYW1lIGFuZCByZW5hbWUgdGhlIGNvbHVtbnMuDQpgYGB7cn0NCnNldHdkKCJDOi9Vc2Vycy9BbmEvRGVza3RvcC9EYXRhIEFuYWx5dGljcy9DU1YgRmlsZXMiKQ0KTllDX3Byb3BlcnR5X3NhbGVzIDwtIHJlYWRfY3N2KCJOWUNfcHJvcGVydHlfc2FsZXMuY3N2IikNCiNoZWFkKE5ZQ19wcm9wZXJ0eV9zYWxlcywgMTAwKQ0KDQojZmlsdGVyIHRvIG9ubHkgaW5jbHVkZSB0aGUgYnVpbGRpbmcgY2xhc3MgImNvbmRvbWluaXVtcyB3aXRoIGVsZXZhdG9ycyINCg0KTllDX2NvbmRvcyA8LSBOWUNfcHJvcGVydHlfc2FsZXMgJT4lDQogIGZpbHRlcihidWlsZGluZ19jbGFzc19hdF90aW1lX29mX3NhbGUgPT0gIlI0IikNCg0KaGVhZChOWUNfY29uZG9zKQ0KYGBgDQpHZW5lcmF0ZSBhIHNjYXR0ZXJwbG90IHVzaW5nIHRoZSBOWUNfY29uZG9zIGRhdGFmcmFtZSB3aXRoIGdyb3NzX3NxdWFyZV9mZWV0IG9uIHRoZSB4LWF4aXMgYW5kIHNhbGVfcHJpY2Ugb24gdGhlIHktYXhpcw0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gTllDX2NvbmRvcywNCiAgICAgICBhZXMoeCA9IGdyb3NzX3NxdWFyZV9mZWV0LCB5ID0gc2FsZV9wcmljZSwgY29sb3IgPSBib3JvdWdoKSkgKw0KICAgIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUpICsNCiAgICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWwgPSBzY2FsZXM6OmNvbW1hKSArDQogICAgeGxpbSgwLCAxMjAwMCkgKyANCiAgICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IEZBTFNFKSArDQogICAgbGFicyh0aXRsZSA9ICJDb25kb21pbml1bSBTYWxlIFByaWNlIHZzIEZsb29yIEFyZWEiLCB4ID0gIkNvbmRvbWluaXVtIHNpemUgKHNxIGZ0KSIsIHkgPSAiU2FsZSBQcmljZSAoJCkiKSArDQogICAgdGhlbWUocGFuZWwuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsID0gIndoaXRlIikpIA0KYGBgDQpNYW51YWxseSBzZXQgdGhlIGxpbWl0cyBvZiB0aGUgcGxvdA0KDQpgYGB7cn0NCiNzZXQgdGhlIGxpbWl0cyBvZiB0aGUgcGxvdCB0byB4ID0gbWF4ICQ1LDAwMCwgeSA9IG1heCAkMjAsMDAwLDAwMA0KDQpnZ3Bsb3QoZGF0YSA9IE5ZQ19jb25kb3MsDQogICAgICAgYWVzKHggPSBncm9zc19zcXVhcmVfZmVldCwgeSA9IHNhbGVfcHJpY2UsIGNvbG9yID0gYm9yb3VnaCkpICsNCiAgICBnZW9tX3BvaW50KGFscGhhID0gMC41KSArDQogICAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVsID0gc2NhbGVzOjpjb21tYSwgbGltaXRzID0gYygwLCAyMDAwMDAwMCkpICsNCiAgICB4bGltKDAsIDUwMDApICsgDQogICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkgKw0KICAgIGxhYnModGl0bGUgPSAiQ29uZG9taW5pdW0gU2FsZSBQcmljZSB2cyBGbG9vciBBcmVhIiwgeCA9ICJDb25kb21pbml1bSBzaXplIChzcSBmdCkiLCB5ID0gIlNhbGUgUHJpY2UgKCQpIikgKw0KICAgIHRoZW1lKHBhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbCA9ICJ3aGl0ZSIpKSANCmBgYA0KU3BsaXQgdGhlIHBsb3QgYnkgYm9yb3VnaA0KYGBge3J9DQojZmFjZXQgd3JhcCBieSBib3JvdWdoDQoNCmdncGxvdChkYXRhID0gTllDX2NvbmRvcywNCiAgICAgICBhZXMoeCA9IGdyb3NzX3NxdWFyZV9mZWV0LCB5ID0gc2FsZV9wcmljZSkpICsNCiAgICBnZW9tX3BvaW50KGFscGhhID0gMC41KSArDQogICAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVsID0gc2NhbGVzOjpjb21tYSkgKw0KICAgICN4bGltKDAsIDUwMDApICsgDQogICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkgKw0KICAgIGxhYnModGl0bGUgPSAiQ29uZG9taW5pdW0gU2FsZSBQcmljZSB2cyBGbG9vciBBcmVhIiwgeCA9ICJDb25kb21pbml1bSBzaXplIChzcSBmdCkiLCB5ID0gIlNhbGUgUHJpY2UgKCQpIikgKw0KICAgIHRoZW1lKHBhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbCA9ICJ3aGl0ZSIpKSArDQogICAgZmFjZXRfd3JhcCh+Ym9yb3VnaCwgc2NhbGVzID0gImZyZWUiLCBuY29sID0gMikNCmBgYA0KVGhlIHBsb3RzIGFib3ZlIHNob3cgdGhhdCB0aGVyZSBpcyBsaWtlbHkgYSBjb3JyZWxhdGlvbiBiZXR3ZWVuIGdyb3NzX3NxdWFyZV9mZWV0IGFuZCBzYWxlX3ByaWNlLiBIb3dldmVyIGl0IGFsc28gYXBwZWFycyB0aGF0IHRoZSByZWdyZXNzaW9uIGxpbmUgaXMgYWZmZWN0ZWQgYnkgb3V0bGllcnMuIE5leHQgc3RlcCBpcyB0byByZW1vdmUgc29tZSBvZiB0aGUgb3V0bGllciB2YWx1ZXMgdGhhdCB3ZSBrbm93IGFyZSBpbmNvcnJlY3QuDQoNClRoZW4gcmUtcGxvdCB0aGUgc2NhdHRlciBwbG90cw0KYGBge3J9DQojcmVtb3ZlIHNvbWUgZXJyb25vdXMgZGF0YSANCg0KTllDX2NvbmRvc19vcmlnaW5hbCA8LSBOWUNfY29uZG9zDQoNCiNyZW1vdmUgZW50cmllcyBmcm9tIHRoYXQgY29ycmVzcG9uZCB0byBzYWxlX3ByaWNlcyA+ICQyMDAgbWlsbGlvbi4gT25lIGlzIHRoZSBwcmljZSBvZiBtdWx0aXBsZSB1bml0cyBhbmQgb25lIGlzIGZvciB0aGUgbW9zdCBleHBlbnNpdmUgYXBhcnRtZW50IGV2ZXIgc29sZC4gQXMgdGhpcyBpcyBub3QgcGFydGljdWxhcmx5IHJlcHJlc2VudGF0aXZlIG9mIHRoZSBnZW5lcmFsIGRhdGEsIGl0IHdpbGwgYmUgcmVtb3ZlZCBhcyBpdCBtYXkgY2F1c2UgdGhlIGxpbmUgb2YgYmVzdCBmaXQgdG8gYmUgc2tld2VkIGF3YXkgZnJvbSB0aGUgbWFqb3JpdHkgb2YgdGhlIGRhdGEuDQoNCk5ZQ19jb25kb3NfMiA8LSBOWUNfY29uZG9zICU+JQ0KICBmaWx0ZXIoc2FsZV9wcmljZSA8IDIwMDAwMDAwMCkNCg0KbnJvdyhOWUNfY29uZG9zXzIpDQoNCiNmaWx0ZXIgb3V0IHJvd3MgdGhhdCBjb3JyZXNwb25kIHRvIG11bHRpcGxlIHVuaXQgc2FsZXMgDQoNCk5ZQ19jb25kb3NfMyA8LSBOWUNfY29uZG9zXzIgJT4lIA0KICBncm91cF9ieShzYWxlX3ByaWNlLCBzYWxlX2RhdGUpICU+JSANCiAgZmlsdGVyKG4oKSA8PSAyKSAlPiUgDQogIGFycmFuZ2UoZGVzYyhzYWxlX3ByaWNlKSkNCg0KaGVhZChOWUNfY29uZG9zXzMpDQoNCm5yb3coTllDX2NvbmRvc18zKQ0KDQpnZ3Bsb3QoZGF0YSA9IE5ZQ19jb25kb3NfMywNCiAgICAgICBhZXMoeCA9IGdyb3NzX3NxdWFyZV9mZWV0LCB5ID0gc2FsZV9wcmljZSkpICsNCiAgICBnZW9tX3BvaW50KGFscGhhID0gMC41KSArDQogICAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVsID0gc2NhbGVzOjpjb21tYSkgKw0KICAgICN4bGltKDAsIDUwMDApICsgDQogICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkgKw0KICAgIGxhYnModGl0bGUgPSAiQ29uZG9taW5pdW0gU2FsZSBQcmljZSB2cyBGbG9vciBBcmVhIiwgeCA9ICJDb25kb21pbml1bSBzaXplIChzcSBmdCkiLCB5ID0gIlNhbGUgUHJpY2UgKCQpIikgKw0KICAgIHRoZW1lKHBhbmVsLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbCA9ICJ3aGl0ZSIpKSArDQogICAgZmFjZXRfd3JhcCh+Ym9yb3VnaCwgc2NhbGVzID0gImZyZWUiLCBuY29sID0gMikNCg0KYGBgDQoNCg0KYGBge3J9DQojZXhwbG9yZSBvdGhlciBwb3RlbnRpYWxseSBlcnJvbm91cyBkYXRhDQoNCk5ZQ19jb25kb3NfZXhwbG9yZSA8LSBOWUNfY29uZG9zXzMgJT4lDQogIGZpbHRlcihib3JvdWdoID09ICJNYW5oYXR0YW4iLCBzYWxlX3ByaWNlID4gNjAwMDAwMDAsIGdyb3NzX3NxdWFyZV9mZWV0IDwgMjUwMCkNCg0KI05ZQ19jb25kb3NfZXhwbG9yZQ0KDQojZXZlbiB0aG91Z2ggdGhlcmUgYXJlIHNvbWUgZnVydGhlciBkYXRhIHRoYXQgY291bGQgYmUgZXJyb3JzLCB0aGVyZSBpcyBubyB3YXkgZnJvbSB0ZWxsaW5nIGZyb20gdGhlIGRhdGEgc2V0IHByb3ZpZGVkLiBUaGVyZWZvcmUsIHRoZXkgd2lsbCBiZSBsZWZ0IGluIHRoZSBkYXRhIHNldC4gRm9yIGV4YW1wbGUgYW4gYXBhcnRtZW50IGluIE1hbmhhdHRhbiB3aXRoIDg2MiBzcSBmdCBidXQgc2FsZSBwcmljZSAkNzQsNDAwLDAwMCBpcyB1bmxpa2VseSB0byBiZSBjb3JyZWN0LCBidXQgdGhlcmUgaXMgbm8gd2F5IG9mIHRlbGxpbmcgaWYgaXQgaXMgb3Igbm90LiANCg0KYGBgDQpDcmVhdGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWxzDQoNCmBgYHtyfQ0KI2NyZWF0ZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbHMNCg0KTllDX2NvbmRvc19sbV9vcmlnaW5hbCA8LSBsbShzYWxlX3ByaWNlIH4gZ3Jvc3Nfc3F1YXJlX2ZlZXQsIGRhdGEgPSBOWUNfY29uZG9zX29yaWdpbmFsKQ0KTllDX2NvbmRvc19sbSA8LSBsbShzYWxlX3ByaWNlIH4gZ3Jvc3Nfc3F1YXJlX2ZlZXQsIGRhdGEgPSBOWUNfY29uZG9zXzMpDQoNCnN1bW1hcnkoTllDX2NvbmRvc19sbV9vcmlnaW5hbCkNCnN1bW1hcnkoTllDX2NvbmRvc19sbSkNCg0KYGBgDQpBIGJpdmFyaWF0ZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbCBoYXMgYmVlbiBjYXJyaWVkIG91dCBvbiB0aGUgc2FsZSBwcmljZSBhbmQgY29uZG9taW5pdW0gc2l6ZSBkYXRhLiBUaGUgaHlwb3RoZXNpcyBpcyB0aGF0IHRoZXJlIGlzIGEgcmVsYXRpb25zaGlwIGJldHdlZW4gc2FsZSBwcmljZSBhbmQgY29uZG9taW5pdW0gc2l6ZS4gVGhlIG51bGwgaHlwb3RoZXNpcyBpcyB0aGF0IHRoZXJlIGlzIG5vIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHNhbGUgcHJpY2UgYW5kIGNvbmRvbWluaXVtIHNpemUuDQoNClRoZSBmaXJzdCB0aGluZyB0byBsb29rIGF0IGlzIHRoZSB2YWx1ZSBvZiDOsjEgaS5lLiB0aGUgZ3JhZGllbnQgb2YgdGhlIHJlZ3Jlc3Npb24gbGluZS4gVGhlIGdyYWRpZW50IGluIHRoZSBvcmlnaW5hbCBkYXRhc2V0IGlzIDExOTMuIFRoZSBncmFkaWVudCBpbiB0aGUgY2xlYW5lZCBkYXRhc2V0IGlzIDM4NTkuIEJvdGggb2YgdGhlc2UgdmFsdWVzIGFyZSBmYXIgZnJvbSB6ZXJvIHNvIGl0IHdvdWxkIHN1Z2dlc3QgdGhlcmUgaXMgYSByZWxhdGlvbnNoaXAgaG93ZXZlciB0aGVyZSBhcmUgb3RoZXIgc3RhdGlzdGljcyB0aGF0IG5lZWQgdG8gYmUgY29uc2lkZXJlZCBiZWZvcmUgbWFraW5nIGEgZmluYWwgY29uY2x1c2lvbi4gDQoNClRoZSB0LXN0YXRpc3RpYyBpcyB0aGUgbnVtYmVyIG9mIHN0YW5kYXJkIGRldmlhdGlvbnMgdGhhdCDOsjEgaXMgZnJvbSAwLiBBIGxhcmdlIHZhbHVlIG9mIHRoZSB0LXN0YXRpc3RpYyBzdWdnZXN0cyB0aGF0IHdlIGNhbiByZWplY3QgdGhlIG51bGwgaHlwb3RoZXNpcy4gVGhlIHQtc3RhdGlzdGljIGlzIDYxIGluIHRoZSBvcmlnaW5hbCBkYXRhc2V0IGFuZCAxMTMgaW4gdGhlIGNsZWFuZWQgZGF0YSBzZXQuIFRoaXMgbWVhbnMgdGhhdCBpbiBib3RoIGRhdGFzZXRzLCB0aGUgdC1zdGF0aXN0aWMgc3VnZ2VzdHMgdGhlIG51bGwgaHlwb3RoZXNpcyBjYW4gYmUgcmVqZWN0ZWQuIA0KDQpXZSBjYW4gYWxzbyBsb29rIGF0IHRoZSBwLXZhbHVlLCB3aGljaCBpcyB0aGUgcHJvYmFiaWxpdHkgb2Ygb2JzZXJ2aW5nIGFueSB2YWx1ZSAob2YgzrIxKSBlcXVhbC10byBvciBsYXJnZXIgdGhhbiB0IGlmIHRoZSBudWxsIGh5cG90aGVzaXMgKEgwKSBpcyB0cnVlLlRoZSBwLXZhbHVlIGlzIHRoZSBwcm9iYWJpbGl0eSBvZiB0aGUgcmVzdWx0IG9jY3VycmluZyBieSBjaGFuY2UuIFNvIGEgcC12YWx1ZSBvZiAwLjkgbWVhbnMgdGhhdCB0aGUgb2NjdXJhbmNlIGlzIHZlcnkgbGlrZWx5IHRvIGhhcHBlbiBieSBjaGFuY2UgYW5kIHRoZXJlZm9yZSBpcyBub3QgZG93biB0byB0aGUgcHJlZGljdG9yIHZhcmlhYmxlLiBUaGVyZWZvcmUgdGhlIG51bGwgaHlwb3RoZXNpcyBzdGFuZHMuIEhvd2V2ZXIsIGluIGJvdGggdGhlc2UgZGF0YXNldHMgdGhlIHAtdmFsdWUgaXMgbmVnbGlkZ2libGUgd2hpY2ggbWVhbnMgdGhhdCBpcyBpdCBpbmNyZWRpYmx5IHVubGlrZWx5IHRoYXQgdGhlIHJlc3BvbnNlIHdlIHNlZSBoZXJlIGlzIGJ5IGNoYW5jZS4gVGhlcmVmb3JlLCB3ZSBjYW4gcmVqZWN0IHRoZSBudWxsIGh5cG90aGVzaXMgdGhhdCB0aGVyZSBpcyBubyBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZSBwcmVkaWN0b3IgYW5kIHJlc3BvbnNlIHZhcmlhYmxlLiBpLmUuIGl0IGlzIGV4dHJlbWVseSB1bmxpa2VseSB0aGF0IHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBjb25kb21pbml1bSBzaXplIGFuZCBzYWxlIHByaWNlIG9ic2VydmVkIGluIHRoaXMgZGF0YXNldCBpcyBkdWUgdG8gcmFuZG9tIGNoYW5jZS4NCg0KUlNFIGlzIHRoZSBhdmVyYWdlIGFtb3VudCB0aGF0IHRoZSByZXNwb25zZSB2YXJpYWJsZSBtZWFzdXJlbWVudHMgZGV2aWF0ZSBmcm9tIHRoZSB0cnVlIHJlZ3Jlc3Npb24gbGluZS4gQSBsb3dlciB2YWx1ZSBmb3IgUlNFIG1lYW5zIGFuIG92ZXJhbGwgYmV0dGVyIGZpdC4gVGhlIFJTRSBpcyA0LDc0NSwwMDAgaW4gdGhlIG9yaWdpbmFsIGRhdGFzZXQgYW5kIDIsNDQyLDAwMCBhZnRlciBkYXRhIGNsZWFuaW5nLg0KDQpSXjIgaXMgYSBtZWFzdXJlIG9mIHRoZSBwcm9wb3J0aW9uIG9mIHRoZSB2YXJpYWJpbGl0eSBpbiBvdXIgcmVzcG9uc2Ugd2hpY2ggY2FuIGJlIGV4cGxhaW5lZCBieSB0aGUgcHJlZGljdG9yIHZhcmlhYmxlLiBSXjIgaXMgYSB2YWx1ZSBiZXR3ZWVuIDAgYW5kIDEuIEEgdmFsdWUgbmVhcmVyIHRvIDEgbWVhbnMgYSBiZXR0ZXIgZml0LiBUaGUgYWRqdXN0ZWQgUl4yIHZhbHVlIHdhcyAwLjMyIGluIHRoZSBvcmlnaW5hbCBkYXRhc2V0IGFuZCAwLjYyIGluIHRoZSBjbGVhbmVkIGRhdGFzZXQuIFRoaXMgbWVhbnMgdGhhdCA2MiUgb2YgdGhlIHZhcmlhYmlsaXR5IGluIG91ciByZXNwb25zZSB2YXJpYWJsZSBjYW4gYmUgZXhwbGFpbmVkIGJ5IHRoZSBwcmVkaWN0b3IgdmFyaWFibGUuDQoNClRoZSBzdGFuZGFyZCBlcnJvciBvZiB0aGUgcmVncmVzc2lvbiBwcm92aWRlcyB0aGUgYWJzb2x1dGUgbWVhc3VyZSBvZiB0aGUgdHlwaWNhbCBkaXN0YW5jZSB0aGF0IHRoZSBkYXRhIHBvaW50cyBmYWxsIGZyb20gdGhlIHJlZ3Jlc3Npb24gbGluZS4gU0UgaXMgaW4gdGhlIHVuaXRzIG9mIHRoZSBkZXBlbmRlbnQgdmFyaWFibGUuIA0KDQpgYGB7cn0NCmNvbmZpbnQoTllDX2NvbmRvc19sbV9vcmlnaW5hbCkNCmNvbmZpbnQoTllDX2NvbmRvc19sbSkNCmBgYA0KDQpMYXN0bHksIHRoZSBjb25maWRlbmNlIGludGVydmFscyBmb3IgdGhlIG9yaWdpbmFsICh1bmNsZWFuZWQgZGF0YXNldCkgaXMgMTE1NSAtIDEyMzEgJCBwZXIgc3F1YXJlIGZvb3QuDQpUaGUgY29uZmlkZW5jZSBpbnRlcnZhbHMgZm9yIHRoZSBjbGVhbmRlZCBkYXRhc2V0IGlzIDM3OTIgLSAzOTI2ICQgcGVyIHNxdWFyZSBmb290Lg0KVGhlIGNvbmZpZGVuY2UgaW50ZXJ2YWxzIGZvciB0aGUgb3JpZ2luYWwgZGF0YXNldCBpcyBsb3dlci4gVGhpcyBpcyBsaWtlbHkgZHVlIHRvIHRoZSBoaWdoIHZhbHVlIGxvdyBmbG9vciBhcmVhIGNvbmRvbWluaXVtcyB0aGF0IHdlcmUgaW5jbHVkZWQgaW4gdGhlIG9yaWdpbmFsIGRhdGFzZXQgYnV0IHdlcmUgcmVtb3ZlZCBpbiB0aGUgY2xlYW5lZCBkYXRhc2V0LiANCg0KTmV4dCwgbmVzdCB0aGUgZGF0YWZyYW1lIGFuZCBmaXQgbW9kZWxzIHRvIHRoZSBkYXRhZnJhbWUuIEFwcGx5IHRoZSBicm9vbSgpIGZ1bmN0aW9ucyB0aWR5KCkgdG8gZWFjaCBuZXN0ZWQgbW9kZWwgdG8gZXh0cmFjdCB0aGUgbW9kZWwgY29lZmZpY2llbnRzLg0KYGBge3J9DQpOWUNfbmVzdGVkIDwtIE5ZQ19jb25kb3NfMyAlPiUgDQogICBncm91cF9ieShib3JvdWdoKSAlPiUgDQogICBuZXN0KCkgJT4lIA0KICAgbXV0YXRlKGxpbmVhcl9tb2RlbCA9IG1hcCgueCA9IGRhdGEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAuZiA9IH5sbShzYWxlX3ByaWNlIH4gZ3Jvc3Nfc3F1YXJlX2ZlZXQsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gLikpKSAlPiUNCiAgIG11dGF0ZSh0aWR5X2NvZWZmaWNpZW50cyA9IG1hcCgueCA9IGxpbmVhcl9tb2RlbCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLmYgPSB0aWR5LCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb25mLmludCA9IFRSVUUpKSANCiNwcmludCBuZXN0ZWQgZGF0YWZyYW1lDQpOWUNfbmVzdGVkDQoNCiNwcmludCBkYXRhIGZvciB0aGUgQnJvbngNCk5ZQ19uZXN0ZWQkZGF0YVtbM11dDQoNCnN1bW1hcnkoTllDX25lc3RlZCRsaW5lYXJfbW9kZWxbWzNdXSkNCmBgYA0KRXh0cmFjdCB0aGUgdGlkeSBtb2RlbCBjb2VmZmljZW50cw0KYGBge3J9DQpOWUNfY29lZmZpY2llbnRzIDwtIE5ZQ19uZXN0ZWQgJT4lDQogICAgc2VsZWN0KGJvcm91Z2gsIHRpZHlfY29lZmZpY2llbnRzKSAlPiUNCiAgICB1bm5lc3QoY29scyA9IHRpZHlfY29lZmZpY2llbnRzKQ0KDQpOWUNfc2xvcGUgPC0gTllDX2NvZWZmaWNpZW50cyAlPiUNCiAgICBmaWx0ZXIodGVybSA9PSAiZ3Jvc3Nfc3F1YXJlX2ZlZXQiKSAlPiUNCiAgICBhcnJhbmdlKGVzdGltYXRlKQ0KDQpOWUNfc2xvcGUNCg0KI2V4dHJhY3QgdGhlIG5lc3RlZCBsaW5lYXIgbW9kZWwgb2JqZWN0IGZvciBvbmUgb2YgdGhlIGJvcm91Z2hzIGFuZCB1c2UgdGhlIHN1bW1hcnkoKSBmdW5jdGlvbiB0byBpbnNwZWN0IHRoZSByZXN1bHRzDQpgYGANCkFnYWluLCBuZXN0IHRoZSBkYXRhZnJhbWUgYW5kIGZpdCBtb2RlbHMgdG8gdGhlIGRhdGFmcmFtZS4gVGhpcyB0aW1lLCBhcHBseSB0aGUgYnJvb20oKSBmdW5jdGlvbiBnbGFuY2UoKSB0byBlYWNoIG5lc3RlZCBtb2RlbCB0byBleHRyYWN0IHRoZSBzdW1tYXJ5IHN0YXRpc3RpY3MuIA0KYGBge3J9DQpOWUNfbmVzdGVkXzIgPC0gTllDX2NvbmRvc18zICU+JSANCiAgIGdyb3VwX2J5KGJvcm91Z2gpICU+JSANCiAgIG5lc3QoKSAlPiUgDQogICBtdXRhdGUobGluZWFyX21vZGVsID0gbWFwKC54ID0gZGF0YSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIC5mID0gfmxtKHNhbGVfcHJpY2UgfiBncm9zc19zcXVhcmVfZmVldCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSAuKSkpICU+JQ0KICAgIG11dGF0ZSh0aWR5X3N1bW1hcnlfc3RhdHMgPSBtYXAoLnggPSBsaW5lYXJfbW9kZWwsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAuZiA9IGdsYW5jZSkpDQoNCiNwcmludCBuZXN0ZWQgZGF0YWZyYW1lDQpOWUNfbmVzdGVkXzINCg0KI3ByaW50IGRhdGEgZm9yIHRoZSBCcm9ueA0KTllDX25lc3RlZF8yJGRhdGFbWzNdXQ0KDQpgYGANCkV4dHJhY3QgdGhlIHN1bW1hcnkgc3RhdGlzdGljcy4NCmBgYHtyfQ0KDQpOWUNfc3VtbWFyeV9zdGF0cyA8LSBOWUNfbmVzdGVkXzIgJT4lDQogICAgc2VsZWN0KGJvcm91Z2gsIHRpZHlfc3VtbWFyeV9zdGF0cykgJT4lDQogICAgdW5uZXN0KGNvbHMgPSB0aWR5X3N1bW1hcnlfc3RhdHMpDQoNCk5ZQ19zdW1tYXJ5X3N0YXRzDQoNCmBgYA0KVGhlIGFuYWx5c2lzIGhhcyBzaG93biB0aGF0LCBvdmVyYWxsLCB3ZSBjYW4gc2F5IHRoYXQgYHNhbGVfcHJpY2VgIGNhbiBiZSBwcmVkaWN0ZWQgYnkgYGdyb3NzX3NxdWFyZV9hcmVhYC4gRm9yIGVhY2ggYm9yb3VnaCB0aGUgdC1zdGF0aXN0aWMgaXMgc3VmZmljaWVudGx5IGhpZ2ggYW5kIHRoZSBwX3ZhbHVlIHN1ZmZpY2llbnRseSBsb3cgdGhhdCB3ZSBjYW4gc2F5IHRoYXQgdGhlcmUgaXMgYSByZWxhdGlvbnNoaXAgYmV0d2VlbiBgc2FsZV9wcmljZWAgYW5kIGBncm9zc19zcXVhcmVfYXJlYWAuIFNvbWUgb2YgdGhlIGJvcm91Z2hzIGhhdmUgcmVncmVzc2lvbiBsaW5lcyB0aGF0IGZpdCB0aGUgZGF0YSBiZXR0ZXIgdGhhbiBvdGhlcnMuIFRoaXMgaXMgZGVtb25zdHJhdGVkIGJ5IHRoZSBhZGp1c3RlZCBSLXNxdWFyZWQgdmFsdWUuIEEgaGlnaGVyIHZhbHVlIG1lYW5zIHRoYXQgbW9yZSBvZiB0aGUgdmFyaWFiaWxpdHkgaW4gdGhlIHJlc3BvbnNlIHZhcmlhYmxlIGNhbiBiZSBhdHRyaWJ1dGVkIHRvIHRoZSBwcmVkaWN0b3IgdmFyaWFibGUuIFRoaXMgaXMgdGhlIGNhc2UgZm9yIE1hbmhhdHRhbi4gSG93ZXZlciB0aGUgZml0IGZvciBRdWVlbnMgaXMgbGVzcyBnb29kIGFzIHRoZSByLXNxdWFyZWQgdmFsdWUgaXMgb25seSAzNCUuIFRoaXMgaXMgZXZpZGVuY2VkIGluIHRoZSBzY2F0dGVyIHBsb3RzIGFzIHlvdSBjYW4gc2VlIHRoYXQgdGhlIHBvaW50cyBmb3IgdGhlc2UgdHdvIGJvcm91Z2hzIGFyZSBzcHJlYWQgZnVydGhlciBmcm9tIHRoZSByZWdyZXNzaW9uIGxpbmUuIA0K