Review - Cleaning and Recoding Survey Variables

Load in a few packages

library(corrplot)      #easy correlation matrices
corrplot 0.84 loaded
library(tidyverse)     #data manipulation
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.3     ✓ purrr   0.3.4
✓ tibble  3.0.6     ✓ dplyr   1.0.4
✓ tidyr   1.1.2     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.1
── Conflicts ──────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(tidymodels)    #easy visualizations of clusters
── Attaching packages ──────────────────────────────────────────── tidymodels 0.1.2 ──
✓ broom     0.7.5      ✓ recipes   0.1.15
✓ dials     0.0.9      ✓ rsample   0.0.9 
✓ infer     0.5.4      ✓ tune      0.1.2 
✓ modeldata 0.1.0      ✓ workflows 0.2.1 
✓ parsnip   0.1.5      ✓ yardstick 0.0.7 
── Conflicts ─────────────────────────────────────────────── tidymodels_conflicts() ──
x scales::discard() masks purrr::discard()
x dplyr::filter()   masks stats::filter()
x recipes::fixed()  masks stringr::fixed()
x dplyr::lag()      masks stats::lag()
x yardstick::spec() masks readr::spec()
x recipes::step()   masks stats::step()
library(NbClust)       #determine optimal no. of clusters
library(psych)         #descriptive statistics

Attaching package: ‘psych’

The following objects are masked from ‘package:scales’:

    alpha, rescale

The following objects are masked from ‘package:ggplot2’:

    %+%, alpha
library(standardize)   #easy standardization

 *********************************************************** 
          Loading standardize package version 0.2.2          
     Call standardize.news() to see new features/changes     
 *********************************************************** 
library(haven)
cluster <- read_dta("ELS_Cluster_500.dta")

SES Composite Score

Let’s check the variable labels and get a breakdown of values:

describe(cluster$byses1)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X11500-0.371.73-0.02-0.370.93-81.8
str(cluster$byses1)
 dbl+lbl [1:500] -0.95,  0.55, -8.00,  0.95,  1.37,  0.12,  1.06,  0.65,  0.18,  ...
 @ label       : chr "Socio-economic status composite, v.1"
 @ format.stata: chr "%12.2g"
 @ labels      : Named num [1:2] -8 -4
  ..- attr(*, "names")= chr [1:2] "{Survey component legitimate skip/NA}" "{Nonrespondent}"

Recode missing, legitimate skips, partial interviews, and nonrespondents as NA in the whole dataset

cluster.clean <- cluster %>%
  na_if(., -9) %>%
  na_if(., -8) %>%
  na_if(., -7) %>%
  na_if(., -4)
glimpse(cluster.clean)
Rows: 500
Columns: 55
$ stu_id   <dbl> 278108, 429101, 195207, 408224, 352115, 276213, 227207, 455121, 117…
$ sch_id   <dbl> 2781, 4291, 1952, 4082, 3521, 2762, 2272, 4551, 1172, 3561, 4341, 1…
$ strat_id <dbl> 278, 429, 195, 408, 352, 276, 227, 455, 117, 356, 434, 160, 417, 14…
$ psu      <dbl+lbl> 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, …
$ f1sch_id <dbl+lbl> 2781, 4291, 1952, 4082, 3521, 2762, 2272, 4551, 1172,   NA, 434…
$ f1univ1  <dbl+lbl> 101, 101, 120, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101…
$ f1univ2a <dbl+lbl> 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ f1univ2b <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 4, 5, 7, 1, 1, …
$ f2univ_p <dbl+lbl> 102, 102, 119, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101…
$ bystuwt  <dbl+lbl> 338.0031,  58.1918,   0.0000,  65.7478,  74.7694, 274.6407, 292…
$ bysex    <dbl+lbl>  2,  2,  1,  1,  2,  1,  1,  1,  1,  2,  1,  1,  1,  1,  2,  1,…
$ byrace   <dbl+lbl>  4,  5, NA,  7,  2,  7,  7,  7,  4,  7,  7,  2,  7,  3,  7,  2,…
$ bystlang <dbl+lbl>  0,  1, NA,  1,  0,  1,  1,  1,  0,  1,  1,  0,  1,  1,  1,  1,…
$ byses1   <dbl+lbl> -0.95,  0.55,    NA,  0.95,  1.37,  0.12,  1.06,  0.65,  0.18, …
$ byses1qu <dbl+lbl>  1,  4, NA,  4,  4,  3,  4,  4,  3,  4,  4,  1,  4,  2,  3,  2,…
$ bygrdrpt <dbl+lbl>  0,  0,  1,  0,  0, 98,  0,  0,  0,  0,  0, 98,  0,  0,  0,  0,…
$ byhomlit <dbl+lbl>  0, NA, NA,  3,  3,  1,  3,  3,  2,  3,  1,  0,  3,  3,  3,  3,…
$ byparasp <dbl+lbl> 5, 7, 5, 6, 7, 5, 3, 7, 3, 5, 6, 5, 5, 5, 5, 6, 2, 5, 2, 5, 6, …
$ byiepflg <dbl+lbl>  1, NA, NA,  0,  0, NA,  0, NA,  0,  0,  0,  1, NA,  0,  0,  0,…
$ bytxcstd <dbl+lbl> 35.61, 57.20,    NA, 53.53, 71.03, 58.91, 54.09, 54.44, 25.92, …
$ bytxcqu  <dbl+lbl>  1,  3, NA,  3,  4,  4,  3,  3,  1,  3,  4,  1,  4,  2,  2,  2,…
$ bywrtnga <dbl+lbl>     NA,  1.951,  0.052,     NA,  1.951,  1.001, -1.847,  1.654,…
$ byxtracu <dbl+lbl>  0,  2, NA,  3,  5,  0,  0,  1,  0,  0,  0,  1,  2,  0,  0,  0,…
$ byhmwrk  <dbl+lbl>  8, 10, NA, 15, 11, 28,  4,  5, 10, 11,  9, 10, 13,  5,  3,  5,…
$ bytvvigm <dbl+lbl> 99,  0, NA,  6,  1,  5, NA,  2,  6,  1,  2,  0,  8,  5,  1,  2,…
$ f1qwt    <dbl+lbl>  398.7574,   62.9523,  139.3723,   62.9402,   71.8370,  294.149…
$ f1pnlwt  <dbl+lbl> 362.2636,  65.4358,   0.0000,  62.9030,  72.8376, 294.3836, 286…
$ f1trscwt <dbl+lbl> 306.8801,  66.4096, 150.3712,  61.8688,  98.8436, 305.3489, 282…
$ f2qtscwt <dbl+lbl>   0.0000,   0.0000, 191.7053,  62.0716,  75.9791, 300.6391, 299…
$ f2qwt    <dbl+lbl>   0.0000,   0.0000, 169.1791,  61.9468,  74.4226, 300.8338, 291…
$ f2f1wt   <dbl+lbl>   0.0000,   0.0000, 180.9959,  63.4727,  77.2253, 337.8765, 295…
$ f2bywt   <dbl+lbl>   0.0000,   0.0000, 167.0404,  60.7443,  73.6182, 297.8983, 286…
$ bys14    <dbl+lbl>  2,  2, NA,  1,  2,  1,  1,  1,  1,  2,  1,  1,  1,  1,  2,  1,…
$ bys15    <dbl+lbl>  1,  1, NA,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,…
$ bys24d   <dbl+lbl>  1,  1, NA,  2,  1,  1, NA,  2,  1,  1,  1,  1,  2,  2,  2,  2,…
$ bys24e   <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,  1,  1,  1,  1,  2,  1,…
$ bys24f   <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,  1,  1,  1,  1,  2,  1,…
$ bys33a   <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0, NA,  1,  1,  0,  1,  1,  0,  0,…
$ bys33b   <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0, NA,  1,  0,  0,  0,  0,  0,  0,…
$ bys43    <dbl+lbl>  1,  0, NA,  0,  2, 10,  2,  5,  0,  3,  9,  2,  2,  3,  0,  0,…
$ bys62a   <dbl+lbl> NA, -3, NA, -3, -3,  0, -3, -3, NA, -3, -3,  0, -3, -3, -3, -3,…
$ bys62g   <dbl+lbl> NA, -3, NA, -3, -3,  1, -3, -3, NA, -3, -3,  0, -3, -3, -3, -3,…
$ bys66a   <dbl+lbl> NA,  1, NA,  1,  1,  6, NA,  1,  1,  1,  1,  6,  1,  1,  1,  1,…
$ bys66b   <dbl+lbl> NA,  1, NA, -3,  1,  6, NA,  1,  1,  1,  1, NA,  1,  1,  1,  1,…
$ bys66c   <dbl+lbl> NA, -1, NA,  6,  1,  6, NA, -1,  1,  1, -1,  6,  1,  1,  1,  6,…
$ bys66f   <dbl+lbl> NA,  1, NA,  1,  1,  1, NA,  1, -1,  1, -1,  6,  1,  1,  1, -1,…
$ bys66g   <dbl+lbl> NA,  1, NA, -3, -3, -1, NA,  1, -1,  1, -1,  6,  1,  1,  1, -1,…
$ bys67    <dbl+lbl>  0, NA, NA,  1,  0,  1,  1,  1,  0,  1,  1,  0,  1,  1,  1,  1,…
$ bys71d   <dbl+lbl>  0, NA, NA,  0,  0,  0, NA,  0,  0, NA,  0,  0,  0,  0,  0,  0,…
$ bys71e   <dbl+lbl>  0, NA, NA,  1,  1,  0, NA,  0,  0, NA,  0,  0,  0,  0,  0,  0,…
$ bys83a   <dbl+lbl>  6, NA, NA,  6,  5, -1,  6,  7,  6,  7,  7, -3,  6,  7,  6,  4,…
$ bys83b   <dbl+lbl>  6, NA, NA, -1,  8, -1,  8,  6,  1,  7,  6, NA,  6,  7,  6,  5,…
$ bys87a   <dbl+lbl>  2, NA, NA,  2,  3,  1, NA,  2,  2,  4,  3,  3,  2,  3,  2,  3,…
$ bys87e   <dbl+lbl>  3, NA, NA,  3,  2,  1, NA,  2,  3,  1,  1,  3,  4,  3,  3,  3,…
$ bys87f   <dbl+lbl>  1, NA, NA,  2,  2,  4, NA,  3,  3,  1,  3,  1,  3,  2,  2,  4,…
ggplot(data = cluster.clean, mapping = aes(x = byses1)) +
  geom_histogram(binwidth = .25, color="black", fill="steel blue") + 
  labs(title = "Histogram of SES Composite Score",
                      x = "SES Composite Score")

Hours Spent on Homework Per Week

describe(cluster.clean$byhmwrk)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X1143912.2117.14812.215.93098
str(cluster.clean$byhmwrk)
 dbl+lbl [1:500]  8, 10, NA, 15, 11, 28,  4,  5, 10, 11,  9, 10, 13,  5,  3,  5, ...
 @ label       : chr "BY hours per week spent on homework (in and out of school)"
 @ format.stata: chr "%12.0f"
 @ labels      : Named num [1:6] -9 -8 -4 97 98 99
  ..- attr(*, "names")= chr [1:6] "{Missing}" "{Survey component legitimate skip/NA}" "{Nonrespondent}" "Out-of-school hmwork hrs top-coded at 26" ...
ggplot(data = cluster.clean, mapping = aes(x = byhmwrk)) +
  geom_bar(color="black", fill="forest green") + 
  labs(title = "Histogram of Hours Spent on Homework Per Week",
                      x = "Hours Spent on Homework Per Week")

Ideally, we want these scores to be relatively normal…so we may want to filter out those scores that are near 100 hours per week.

Math Engagement (Totally Absorbed in Math)

describe(cluster.clean$bys87a)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X113612.480.822.481.4814
table(cluster.clean$bys87a)

  1   2   3   4 
 31 166 125  39 
str(cluster.clean$bys87a)
 dbl+lbl [1:500]  2, NA, NA,  2,  3,  1, NA,  2,  2,  4,  3,  3,  2,  3,  2,  3, ...
 @ label       : chr "Gets totally absorbed in mathematics"
 @ format.stata: chr "%12.0f"
 @ labels      : Named num [1:9] -9 -8 -7 -6 -4 1 2 3 4
  ..- attr(*, "names")= chr [1:9] "{Missing}" "{Survey component legitimate skip/NA}" "{Partial interview-breakoff}" "{Multiple response}" ...
ggplot(data = cluster.clean, mapping = aes(x = bys87a)) +
  geom_bar(color="black", fill="dark red") + 
  labs(title = "Histogram of Math Engagement",
                      x = "Math Engagement")

Math Importance

describe(cluster.clean$bys87f)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X113642.520.922.521.4814
table(cluster.clean$bys87f)

  1   2   3   4 
 40 156 108  60 
str(cluster.clean$bys87f)
 dbl+lbl [1:500]  1, NA, NA,  2,  2,  4, NA,  3,  3,  1,  3,  1,  3,  2,  2,  4, ...
 @ label       : chr "Mathematics is important"
 @ format.stata: chr "%12.0f"
 @ labels      : Named num [1:9] -9 -8 -7 -6 -4 1 2 3 4
  ..- attr(*, "names")= chr [1:9] "{Missing}" "{Survey component legitimate skip/NA}" "{Partial interview-breakoff}" "{Multiple response}" ...
ggplot(data = cluster.clean, mapping = aes(x = bys87f)) +
  geom_bar(color="black", fill="purple") + 
  labs(title = "Histogram of Math Importance",
                      x = "Math Importance")

Quick Data Management Lesson - Create Standardized Versions of Your Variables

The package standardize is a great way to standardize your variables and control for continuous variable scaling and factor contrasts:

cluster.clean <- cluster.clean %>% 
  mutate(.,
        hwk_std = scale(cluster.clean$byhmwrk),
        import_std = scale(cluster.clean$bys87f),
        engage_std =  scale(cluster.clean$bys87a)) %>%
  filter(byhmwrk < 50) #Filter out those really high HW hours
  
describe(cluster.clean$hwk_std)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X11425-0.160.43-0.3-0.230.35-0.711.45
describe(cluster.clean$import_std)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X11342-0.011.01-0.5801.66-1.691.66
describe(cluster.clean$engage_std)
ABCDEFGHIJ0123456789
 
 
vars
<dbl>
n
<dbl>
mean
<dbl>
sd
<dbl>
median
<dbl>
trimmed
<dbl>
mad
<dbl>
min
<dbl>
max
<dbl>
X11339-0.021-0.6-0.051.85-1.851.91

Now, summarize/ visualize and analyze correlations:

corr_cluster
               hwk_std  import_std engage_std
hwk_std     1.00000000 -0.08078977 -0.1596826
import_std -0.08078977  1.00000000  0.5166737
engage_std -0.15968257  0.51667365  1.0000000

Step 1: Choose a Clustering Approach and Find a Solution

Approach #1: Hierarchical Clustering

hclusts <- hclust(dist(corr, method = "euclidean"), method = "ward.D2")
hclusts

Call:
hclust(d = dist(corr, method = "euclidean"), method = "ward.D2")

Cluster method   : ward.D2 
Distance         : euclidean 
Number of objects: 333 

Create a dendrogram to visualize the solution:

plot(hclusts)

wardclust <- NbClust(data = corr, method = "ward.D2") 
*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot. 
 

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 5 proposed 2 as the best number of clusters 
* 2 proposed 3 as the best number of clusters 
* 4 proposed 4 as the best number of clusters 
* 5 proposed 6 as the best number of clusters 
* 1 proposed 9 as the best number of clusters 
* 1 proposed 13 as the best number of clusters 
* 1 proposed 14 as the best number of clusters 
* 4 proposed 15 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  2 
 
 
******************************************************************* 

plot(hclusts)
rect.hclust(hclusts,k=8, border="red")

Approach #2: K-Means Clustering

library(tidymodels)
kclusts <- 
  tibble(k = 1:9) %>%
  mutate(
    kclust = map(k, ~kmeans(corr, .x)),
    tidied = map(kclust, tidy),
    glanced = map(kclust, glance),
    augmented = map(kclust, augment, corr)
  )

Visualize it

clusters <- 
  kclusts %>%
  unnest(cols = c(tidied))

assignments <- 
  kclusts %>% 
  unnest(cols = c(augmented))

clusterings <- 
  kclusts %>%
  unnest(cols = c(glanced))

Scatter matrix with group colors

p1 <- 
  ggplot(assignments, aes(x = import_std, y = engage_std)) +
  geom_jitter(aes(color = .cluster), alpha = 0.8) + 
  facet_wrap(~ k)
p1

Same thing, with X’es denoting the centers of each cluster…

p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x")
p2

Use NbClust to get fit statistics for each solution

kmeansclust <- NbClust(data = corr, method = "kmeans")
*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot. 
 

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 5 proposed 2 as the best number of clusters 
* 3 proposed 3 as the best number of clusters 
* 1 proposed 4 as the best number of clusters 
* 6 proposed 5 as the best number of clusters 
* 1 proposed 10 as the best number of clusters 
* 2 proposed 14 as the best number of clusters 
* 5 proposed 15 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  5 
 
 
******************************************************************* 

LS0tCnRpdGxlOiAiTXVsdGl2YXJpYXRlIFN0YXRpc3RpY3M6IE1vZHVsZSA4LSBJbnRyb2R1Y3Rpb24gdG8gQ2x1c3RlciBBbmFseXNpcyIKYXV0aG9yOiAiRHIuIEJyb2RhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCiMgUmV2aWV3IC0gQ2xlYW5pbmcgYW5kIFJlY29kaW5nIFN1cnZleSBWYXJpYWJsZXMKIyMgTG9hZCBpbiBhIGZldyBwYWNrYWdlcwpgYGB7cn0KbGlicmFyeShjb3JycGxvdCkgICAgICAjZWFzeSBjb3JyZWxhdGlvbiBtYXRyaWNlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkgICAgICNkYXRhIG1hbmlwdWxhdGlvbgpsaWJyYXJ5KHRpZHltb2RlbHMpICAgICNlYXN5IHZpc3VhbGl6YXRpb25zIG9mIGNsdXN0ZXJzCmxpYnJhcnkoTmJDbHVzdCkgICAgICAgI2RldGVybWluZSBvcHRpbWFsIG5vLiBvZiBjbHVzdGVycwpsaWJyYXJ5KHBzeWNoKSAgICAgICAgICNkZXNjcmlwdGl2ZSBzdGF0aXN0aWNzCmxpYnJhcnkoc3RhbmRhcmRpemUpICAgI2Vhc3kgc3RhbmRhcmRpemF0aW9uCmBgYAoKYGBge3J9CmxpYnJhcnkoaGF2ZW4pCmNsdXN0ZXIgPC0gcmVhZF9kdGEoIkVMU19DbHVzdGVyXzUwMC5kdGEiKQpgYGAKCiMjIFNFUyBDb21wb3NpdGUgU2NvcmUKTGV0J3MgY2hlY2sgdGhlIHZhcmlhYmxlIGxhYmVscyBhbmQgZ2V0IGEgYnJlYWtkb3duIG9mIHZhbHVlczoKYGBge3J9CmRlc2NyaWJlKGNsdXN0ZXIkYnlzZXMxKQpzdHIoY2x1c3RlciRieXNlczEpCmBgYAoKUmVjb2RlIG1pc3NpbmcsIGxlZ2l0aW1hdGUgc2tpcHMsIHBhcnRpYWwgaW50ZXJ2aWV3cywgYW5kIG5vbnJlc3BvbmRlbnRzIGFzIGBOQWAgaW4gdGhlIHdob2xlIGRhdGFzZXQKYGBge3J9CmNsdXN0ZXIuY2xlYW4gPC0gY2x1c3RlciAlPiUKICBuYV9pZiguLCAtOSkgJT4lCiAgbmFfaWYoLiwgLTgpICU+JQogIG5hX2lmKC4sIC03KSAlPiUKICBuYV9pZiguLCAtNCkKYGBgCgpgYGB7cn0KZ2xpbXBzZShjbHVzdGVyLmNsZWFuKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9IGNsdXN0ZXIuY2xlYW4sIG1hcHBpbmcgPSBhZXMoeCA9IGJ5c2VzMSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IC4yNSwgY29sb3I9ImJsYWNrIiwgZmlsbD0ic3RlZWwgYmx1ZSIpICsgCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgU0VTIENvbXBvc2l0ZSBTY29yZSIsCiAgICAgICAgICAgICAgICAgICAgICB4ID0gIlNFUyBDb21wb3NpdGUgU2NvcmUiKQpgYGAKCiMjIEhvdXJzIFNwZW50IG9uIEhvbWV3b3JrIFBlciBXZWVrCmBgYHtyfQpkZXNjcmliZShjbHVzdGVyLmNsZWFuJGJ5aG13cmspCnN0cihjbHVzdGVyLmNsZWFuJGJ5aG13cmspCmBgYAoKYGBge3J9CmdncGxvdChkYXRhID0gY2x1c3Rlci5jbGVhbiwgbWFwcGluZyA9IGFlcyh4ID0gYnlobXdyaykpICsKICBnZW9tX2Jhcihjb2xvcj0iYmxhY2siLCBmaWxsPSJmb3Jlc3QgZ3JlZW4iKSArIAogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mIEhvdXJzIFNwZW50IG9uIEhvbWV3b3JrIFBlciBXZWVrIiwKICAgICAgICAgICAgICAgICAgICAgIHggPSAiSG91cnMgU3BlbnQgb24gSG9tZXdvcmsgUGVyIFdlZWsiKQpgYGAKCklkZWFsbHksIHdlIHdhbnQgdGhlc2Ugc2NvcmVzIHRvIGJlIHJlbGF0aXZlbHkgbm9ybWFsLi4uc28gd2UgbWF5IHdhbnQgdG8gZmlsdGVyIG91dCB0aG9zZSBzY29yZXMgdGhhdCBhcmUgbmVhciAxMDAgaG91cnMgcGVyIHdlZWsuCgojIyBNYXRoIEVuZ2FnZW1lbnQgKFRvdGFsbHkgQWJzb3JiZWQgaW4gTWF0aCkKYGBge3J9CmRlc2NyaWJlKGNsdXN0ZXIuY2xlYW4kYnlzODdhKQp0YWJsZShjbHVzdGVyLmNsZWFuJGJ5czg3YSkKc3RyKGNsdXN0ZXIuY2xlYW4kYnlzODdhKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9IGNsdXN0ZXIuY2xlYW4sIG1hcHBpbmcgPSBhZXMoeCA9IGJ5czg3YSkpICsKICBnZW9tX2Jhcihjb2xvcj0iYmxhY2siLCBmaWxsPSJkYXJrIHJlZCIpICsgCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgTWF0aCBFbmdhZ2VtZW50IiwKICAgICAgICAgICAgICAgICAgICAgIHggPSAiTWF0aCBFbmdhZ2VtZW50IikKYGBgCgojIyBNYXRoIEltcG9ydGFuY2UKYGBge3J9CmRlc2NyaWJlKGNsdXN0ZXIuY2xlYW4kYnlzODdmKQp0YWJsZShjbHVzdGVyLmNsZWFuJGJ5czg3ZikKc3RyKGNsdXN0ZXIuY2xlYW4kYnlzODdmKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9IGNsdXN0ZXIuY2xlYW4sIG1hcHBpbmcgPSBhZXMoeCA9IGJ5czg3ZikpICsKICBnZW9tX2Jhcihjb2xvcj0iYmxhY2siLCBmaWxsPSJwdXJwbGUiKSArIAogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mIE1hdGggSW1wb3J0YW5jZSIsCiAgICAgICAgICAgICAgICAgICAgICB4ID0gIk1hdGggSW1wb3J0YW5jZSIpCmBgYAoKIyBRdWljayBEYXRhIE1hbmFnZW1lbnQgTGVzc29uIC0gQ3JlYXRlIFN0YW5kYXJkaXplZCBWZXJzaW9ucyBvZiBZb3VyIFZhcmlhYmxlcwoKVGhlIHBhY2thZ2UgYHN0YW5kYXJkaXplYCBpcyBhIGdyZWF0IHdheSB0byBzdGFuZGFyZGl6ZSB5b3VyIHZhcmlhYmxlcyBhbmQgY29udHJvbCBmb3IgY29udGludW91cyB2YXJpYWJsZSBzY2FsaW5nIGFuZCBmYWN0b3IgY29udHJhc3RzOgpgYGB7cn0KY2x1c3Rlci5jbGVhbiA8LSBjbHVzdGVyLmNsZWFuICU+JSAKICBtdXRhdGUoLiwKICAgICAgICBod2tfc3RkID0gc2NhbGUoY2x1c3Rlci5jbGVhbiRieWhtd3JrKSwKICAgICAgICBpbXBvcnRfc3RkID0gc2NhbGUoY2x1c3Rlci5jbGVhbiRieXM4N2YpLAogICAgICAgIGVuZ2FnZV9zdGQgPSAgc2NhbGUoY2x1c3Rlci5jbGVhbiRieXM4N2EpKSAlPiUKICBmaWx0ZXIoYnlobXdyayA8IDUwKSAjRmlsdGVyIG91dCB0aG9zZSByZWFsbHkgaGlnaCBIVyBob3VycwogIApgYGAKCmBgYHtyfQpkZXNjcmliZShjbHVzdGVyLmNsZWFuJGh3a19zdGQpCmRlc2NyaWJlKGNsdXN0ZXIuY2xlYW4kaW1wb3J0X3N0ZCkKZGVzY3JpYmUoY2x1c3Rlci5jbGVhbiRlbmdhZ2Vfc3RkKQpgYGAKTm93LCBzdW1tYXJpemUvIHZpc3VhbGl6ZSBhbmQgYW5hbHl6ZSBjb3JyZWxhdGlvbnM6CmBgYHtyfQpsaWJyYXJ5KGdnY29ycnBsb3QpCmNvcnIgPC0gY2x1c3Rlci5jbGVhbiAlPiUKICBzZWxlY3QoLiwKICAgICAgICAgaHdrX3N0ZCwKICAgICAgICAgaW1wb3J0X3N0ZCwKICAgICAgICAgZW5nYWdlX3N0ZCkgJT4lCiAgbmEub21pdCgpCgpjb3JyX2NsdXN0ZXIgPC0gY29yKGNvcnIpCgpjb3JyX2NsdXN0ZXIKYGBgCgojIFN0ZXAgMTogQ2hvb3NlIGEgQ2x1c3RlcmluZyBBcHByb2FjaCBhbmQgRmluZCBhIFNvbHV0aW9uCiMjIEFwcHJvYWNoICMxOiBIaWVyYXJjaGljYWwgQ2x1c3RlcmluZwpgYGB7cn0KaGNsdXN0cyA8LSBoY2x1c3QoZGlzdChjb3JyLCBtZXRob2QgPSAiZXVjbGlkZWFuIiksIG1ldGhvZCA9ICJ3YXJkLkQyIikKaGNsdXN0cwpgYGAKCiMjIENyZWF0ZSBhIGRlbmRyb2dyYW0gdG8gdmlzdWFsaXplIHRoZSBzb2x1dGlvbjoKYGBge3J9CnBsb3QoaGNsdXN0cykKYGBgCgpgYGB7cn0Kd2FyZGNsdXN0IDwtIE5iQ2x1c3QoZGF0YSA9IGNvcnIsIG1ldGhvZCA9ICJ3YXJkLkQyIikgCmBgYAoKYGBge3J9CnBsb3QoaGNsdXN0cykKcmVjdC5oY2x1c3QoaGNsdXN0cyxrPTgsIGJvcmRlcj0icmVkIikKYGBgCgojIEFwcHJvYWNoICMyOiBLLU1lYW5zIENsdXN0ZXJpbmcKYGBge3J9CmxpYnJhcnkodGlkeW1vZGVscykKa2NsdXN0cyA8LSAKICB0aWJibGUoayA9IDE6OSkgJT4lCiAgbXV0YXRlKAogICAga2NsdXN0ID0gbWFwKGssIH5rbWVhbnMoY29yciwgLngpKSwKICAgIHRpZGllZCA9IG1hcChrY2x1c3QsIHRpZHkpLAogICAgZ2xhbmNlZCA9IG1hcChrY2x1c3QsIGdsYW5jZSksCiAgICBhdWdtZW50ZWQgPSBtYXAoa2NsdXN0LCBhdWdtZW50LCBjb3JyKQogICkKYGBgCiMjIFZpc3VhbGl6ZSBpdApgYGB7cn0KY2x1c3RlcnMgPC0gCiAga2NsdXN0cyAlPiUKICB1bm5lc3QoY29scyA9IGModGlkaWVkKSkKCmFzc2lnbm1lbnRzIDwtIAogIGtjbHVzdHMgJT4lIAogIHVubmVzdChjb2xzID0gYyhhdWdtZW50ZWQpKQoKY2x1c3RlcmluZ3MgPC0gCiAga2NsdXN0cyAlPiUKICB1bm5lc3QoY29scyA9IGMoZ2xhbmNlZCkpCmBgYAoKIyMgU2NhdHRlciBtYXRyaXggd2l0aCBncm91cCBjb2xvcnMKYGBge3J9CnAxIDwtIAogIGdncGxvdChhc3NpZ25tZW50cywgYWVzKHggPSBpbXBvcnRfc3RkLCB5ID0gZW5nYWdlX3N0ZCkpICsKICBnZW9tX2ppdHRlcihhZXMoY29sb3IgPSAuY2x1c3RlciksIGFscGhhID0gMC44KSArIAogIGZhY2V0X3dyYXAofiBrKQpwMQpgYGAKCiMjIFNhbWUgdGhpbmcsIHdpdGggWCdlcyBkZW5vdGluZyB0aGUgY2VudGVycyBvZiBlYWNoIGNsdXN0ZXIuLi4KYGBge3J9CnAyIDwtIHAxICsgZ2VvbV9wb2ludChkYXRhID0gY2x1c3RlcnMsIHNpemUgPSAxMCwgc2hhcGUgPSAieCIpCnAyCmBgYAojIyBVc2UgYE5iQ2x1c3RgIHRvIGdldCBmaXQgc3RhdGlzdGljcyBmb3IgZWFjaCBzb2x1dGlvbgpgYGB7cn0Ka21lYW5zY2x1c3QgPC0gTmJDbHVzdChkYXRhID0gY29yciwgbWV0aG9kID0gImttZWFucyIpCmBgYAoK