Morgan State University
Department of Information Science & Systems
Fall 2024
INSS 615: Data Wrangling for Visualization
Name: Frenandez Lawrence
Due: Dec 1, 2024 (Sunday)
Questions
A. Scrape the College Ranked by Acceptance Rate dataset available at
this link: https://www.oedb.org/rankings/acceptance-rate/#table-rankings
and select the first 9 columns [Rank, School, Student to Faculty Ratio,
Graduation Rate, Retention Rate, Acceptance Rate, Enrollment Rate,
Institutional Aid Rate, and Default Rate] as the dataset for this
assignment. [20 Points]
Hint: There are 6 pages of data, so you may want to use a for loop to
automate the scraping process and combine the data from all 6 pages.
This is just a suggestion—you are free to create the dataset without
automating the web scrapping process.
Solution:
library(rvest)
library(tidyverse)
library(dplyr)
# Define the base URL and an empty list to store data
base_url <- "https://www.oedb.org/rankings/acceptance-rate/#table-rankings"
base_URL_modified <- "https://www.oedb.org/rankings/acceptance-rate/"
base_URL_end <- "/#table-rankings"
all_pages_data <- list()
# Scraping page 1 since base url has different format than others.
page1_html <- read_html(base_url)
# Extract the table from the HTML
table_node <- html_node(page1_html, "table")
# Read the table into a dataframe
page1_data <- html_table(table_node, fill = TRUE)
all_pages_data[[1]] <- page1_data
#view(all_pages_data)
# Loop through the other 5 pages
for (page in 2:6) {
# Construct the URL for each page (adjust if necessary)
page_url <- paste0(base_URL_modified, "page/", page, base_URL_end)
# Scrape the page
page_html <- read_html(page_url)
# Extract the table from the HTML
table_node <- html_node(page_html, "table")
# Read the table into a dataframe
if (!is.null(table_node)) {
page_data <- html_table(table_node, fill = TRUE)
all_pages_data[[page]] <- page_data
} else {
message("Table not found on page: ", page)
}
}
# Combine data from all pages into a single dataframe
dataset <- bind_rows(all_pages_data)
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
# Select the first 9 required columns
selected_columns <- c(
"Rank", "School", "Student to Faculty Ratio", "Graduation Rate",
"Retention Rate", "Acceptance Rate", "Enrollment Rate",
"Institutional Aid Rate", "Default Rate"
)
dataset <- dataset %>% select(all_of(selected_columns))
# Replace "N/A" with NA
#dataset <- dataset %>% mutate(across(everything(), ~ na_if(.x, "N/A")))
# Display the cleaned dataset
print(dataset)
file_path <- "~/Downloads/college_acceptance_data.csv"
# Save the dataset as a CSV file
write.csv(dataset, file = file_path, row.names = FALSE)
# Message to confirm saving
cat("Dataset saved as:", file_path)
Dataset saved as: ~/Downloads/college_acceptance_data.csv
# multipage scraping using a for loop
B. You are going to need the dataset created in Question A to answer
the following questions. There are 16 questions each carrying 5
points:
- Replace the missing values “N/A” in the dataset with NA.
Solution:
path = "/Users/frenandezlawrence/Downloads/college_acceptance_data.csv"
data <- read.csv(path)
head(data, n=5)
# Replace "N/A" with NA in the entire dataframe
data_cleaned <- data
data_cleaned[data_cleaned == "N/A"] <- NA
View(data_cleaned)
- Convert percentage columns (e.g., Graduation Rate) to numeric
format.
Solution:
# Load necessary library
library(dplyr)
# Convert percentage columns to numeric
data_cleaned <- data_cleaned %>%
mutate(across(
c('Graduation.Rate', 'Retention.Rate', 'Acceptance.Rate', 'Enrollment.Rate', 'Institutional.Aid.Rate', 'Default.Rate'),
~ as.numeric(gsub("%", "", .))
))
- Transform the “Student to Faculty Ratio” column into two separate
numeric columns: Students and Faculty.
Solution:
#necessary libraries are loaded
library(dplyr)
library(stringr)
# Extract "Students" and "Faculty" into new columns
data_cleaned <- data_cleaned %>%
mutate(
Students = as.numeric(str_extract(Student.to.Faculty.Ratio, "^[0-9]+")),
Faculty = as.numeric(str_extract(Student.to.Faculty.Ratio, "[0-9]+$"))
)
view(data_cleaned)
- What is the count of missing values in the “Default Rate” column?
Impute the missing values in the “Default Rate” column with the median
value.
Solution:
# Count the number of missing values in "Default Rate"
missing_count <- sum(is.na(data_cleaned$'Default.Rate'))
print(missing_count)
[1] 291
# Calculate the median of the "Default Rate" column
median_default_rate <- median(data_cleaned$'Default.Rate', na.rm = TRUE)
# Replace missing values with the median
data_cleaned$'Default.Rate'[is.na(data_cleaned$'Default.Rate')] <- median_default_rate
#View(data_cleaned)
- Find the average graduation rate for universities ranked in the top
50.
Solution:
# Filter the dataset for top 50 universities
top_50_universities <- data_cleaned %>% filter(Rank <= 50)
# Calculate the average graduation rate
average_graduation_rate <- mean(top_50_universities$'Graduation.Rate', na.rm = TRUE)
# Display the result
print(average_graduation_rate)
[1] 79.18
- Filter universities with a retention rate above 90% and find the
count of rows in the subset.
Solution:
# Load dplyr
library(dplyr)
# Filter universities with Retention Rate > 90%
high_retention <- data_cleaned %>% filter(Retention.Rate > 90)
# Count the rows in the subset
high_retention_count <- nrow(high_retention)
# Display the count
print(high_retention_count)
[1] 98
- Rank universities by enrollment rate in descending order and display
the last 6 rows.
Solution:
# Load dplyr
library(dplyr)
# Rank universities by Enrollment Rate in descending order
ranked_data <- data_cleaned %>% arrange(desc(Enrollment.Rate))
# Display the last 6 rows
tail(ranked_data, 6)
NA
- Create a histogram of graduation rates using ggplot2 library.
Solution:
# Load ggplot2
library(ggplot2)
# Create a histogram of Graduation Rates
ggplot(data_cleaned, aes(x = Graduation.Rate)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Graduation Rates",
x = "Graduation Rate (%)",
y = "Count") +
theme_minimal()
Warning: Removed 6 rows containing non-finite outside the scale range (`stat_bin()`).

- Plot a scatterplot between acceptance rate and enrollment rate using
ggplot2 library.
Solution:
# Load ggplot2
library(ggplot2)
# Create a scatterplot
ggplot(data_cleaned, aes(x = Acceptance.Rate, y = Enrollment.Rate)) +
geom_point(color = "blue", size = 2, alpha = 0.7) +
labs(title = "Scatterplot of Acceptance Rate vs Enrollment Rate",
x = "Acceptance Rate (%)",
y = "Enrollment Rate (%)") +
theme_minimal()
Warning: Removed 29 rows containing missing values or values outside the scale range (`geom_point()`).

- Calculate the average default rate by aid rate category (e.g.,
grouped into ranges like 0-20%, 20-40%). Display the categories.
Solution:
# Create Aid Rate categories
data_cleaned$AidRateCategory <- cut(
data_cleaned$Institutional.Aid.Rate,
breaks = c(0, 20, 40, 60, 80, 100),
labels = c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%"),
include.lowest = TRUE
)
# Load dplyr
library(dplyr)
# Calculate average default rate by aid rate category
average_default_rate <- data_cleaned %>%
group_by(AidRateCategory) %>%
summarize(AverageDefaultRate = mean(Default.Rate, na.rm = TRUE))
# Display the result
print(average_default_rate)
view(data_cleaned)
- Normalize the acceptance rate to a scale of 0-1 and save in a new
column “Acceptance Rate Normalized”. Display the first 6 values.
Solution:
library(scales)
# Normalize Acceptance Rate to 0-1 scale
data_cleaned$`Acceptance Rate Normalized` <- (data_cleaned$Acceptance.Rate - min(data_cleaned$Acceptance.Rate, na.rm = TRUE)) /
(max(data_cleaned$Acceptance.Rate, na.rm = TRUE) - min(data_cleaned$Acceptance.Rate, na.rm = TRUE))
# Display the first 6 values of the normalized column
head(data_cleaned$'Acceptance Rate Normalized')
[1] 0.00000000 0.01063830 0.04255319 0.08510638 0.09574468 0.10638298
- What is the count of the duplicate entries in the “School” column?
Remove duplicate university entries.
Solution:
# Count duplicate entries in the "School" column
duplicate_count <- sum(duplicated(data_cleaned$School))
print(duplicate_count)
[1] 3
# Load dplyr
library(dplyr)
# Remove duplicate entries based on "School"
data_cleaned <- data_cleaned %>% distinct(School, .keep_all = TRUE)
- Find the correlation between graduation rate and retention rate
(exclude the NAs in both columns).
Solution:
# Calculate the correlation between Graduation Rate and Retention Rate
correlation <- cor(data_cleaned$Graduation.Rate, data_cleaned$Retention.Rate, use = "complete.obs")
# Display the result
print(correlation)
[1] 0.6159709
- Extract the values in School column into a new variable without
“University” in the string. For example “Rowan University” becomes
“Rowan”
Solution:
# Remove "University" from the School column
#The exaple provided was for schools that had university at as the second portion of the string, hence the solution was written like that.
# Load stringr
library(stringr)
# Remove "University" from the School column
school_names <- str_replace(data_cleaned$School, " University", "")
# Display the result
print(head(school_names))
[1] "Harvard" "Yale" "University of Pennsylvania" "Johns Hopkins"
[5] "Cornell" "Tufts"
- Count how many universities have “Institute” in their name.
Solution:
# Load stringr
library(stringr)
# Count universities with "Institute" in their name
institute_count <- sum(str_detect(data_cleaned$School, "Institute"))
# Display the count
print(institute_count)
[1] 17
- Export the cleaned and processed dataset to a CSV file.
Solution:
file_path <- "~/Downloads/cleaned_college_acceptance_data.csv"
# Save the dataset as a CSV file
write.csv(dataset, file = file_path, row.names = FALSE)
LS0tCnRpdGxlOiAiSU5TUzYxNSBIb21ld29yayA1IgpvdXRwdXQ6CiAgIyB3b3JkX2RvY3VtZW50OiBkZWZhdWx0CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKLS0tCgoKKipNb3JnYW4gU3RhdGUgVW5pdmVyc2l0eSoqCgoqKkRlcGFydG1lbnQgb2YgSW5mb3JtYXRpb24gU2NpZW5jZSAmIFN5c3RlbXMqKgoKKipGYWxsIDIwMjQqKgoKKipJTlNTIDYxNTogRGF0YSBXcmFuZ2xpbmcgZm9yIFZpc3VhbGl6YXRpb24qKgoKKipOYW1lOiBGcmVuYW5kZXogTGF3cmVuY2UgKioKCipEdWU6IERlYyAxLCAyMDI0IChTdW5kYXkpKgoKCgpRdWVzdGlvbnMKCgpBLiBTY3JhcGUgdGhlIENvbGxlZ2UgUmFua2VkIGJ5IEFjY2VwdGFuY2UgUmF0ZSBkYXRhc2V0IGF2YWlsYWJsZSBhdCB0aGlzIGxpbms6IGh0dHBzOi8vd3d3Lm9lZGIub3JnL3JhbmtpbmdzL2FjY2VwdGFuY2UtcmF0ZS8jdGFibGUtcmFua2luZ3MgYW5kIHNlbGVjdCB0aGUgZmlyc3QgOSBjb2x1bW5zIFtSYW5rLCBTY2hvb2wsIFN0dWRlbnQgdG8gRmFjdWx0eSBSYXRpbywgR3JhZHVhdGlvbiBSYXRlLCBSZXRlbnRpb24gUmF0ZSwgQWNjZXB0YW5jZSBSYXRlLCBFbnJvbGxtZW50IFJhdGUsIEluc3RpdHV0aW9uYWwgQWlkIFJhdGUsIGFuZCBEZWZhdWx0IFJhdGVdIGFzIHRoZSBkYXRhc2V0IGZvciB0aGlzIGFzc2lnbm1lbnQuIFsyMCBQb2ludHNdCgpIaW50OiBUaGVyZSBhcmUgNiBwYWdlcyBvZiBkYXRhLCBzbyB5b3UgbWF5IHdhbnQgdG8gdXNlIGEgZm9yIGxvb3AgdG8gYXV0b21hdGUgdGhlIHNjcmFwaW5nIHByb2Nlc3MgYW5kIGNvbWJpbmUgdGhlIGRhdGEgZnJvbSBhbGwgNiBwYWdlcy4gVGhpcyBpcyBqdXN0IGEgc3VnZ2VzdGlvbuKAlHlvdSBhcmUgZnJlZSB0byBjcmVhdGUgdGhlIGRhdGFzZXQgd2l0aG91dCBhdXRvbWF0aW5nIHRoZSB3ZWIgc2NyYXBwaW5nIHByb2Nlc3MuCgogCiAgU29sdXRpb246CmBgYHtyfQpsaWJyYXJ5KHJ2ZXN0KSAKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCgojIERlZmluZSB0aGUgYmFzZSBVUkwgYW5kIGFuIGVtcHR5IGxpc3QgdG8gc3RvcmUgZGF0YQpiYXNlX3VybCA8LSAiaHR0cHM6Ly93d3cub2VkYi5vcmcvcmFua2luZ3MvYWNjZXB0YW5jZS1yYXRlLyN0YWJsZS1yYW5raW5ncyIKYmFzZV9VUkxfbW9kaWZpZWQgPC0gImh0dHBzOi8vd3d3Lm9lZGIub3JnL3JhbmtpbmdzL2FjY2VwdGFuY2UtcmF0ZS8iCmJhc2VfVVJMX2VuZCA8LSAiLyN0YWJsZS1yYW5raW5ncyIgCmFsbF9wYWdlc19kYXRhIDwtIGxpc3QoKQoKCiAgIyBTY3JhcGluZyBwYWdlIDEgc2luY2UgYmFzZSB1cmwgaGFzIGRpZmZlcmVudCBmb3JtYXQgdGhhbiBvdGhlcnMuCiAgcGFnZTFfaHRtbCA8LSByZWFkX2h0bWwoYmFzZV91cmwpCiAgCiAgIyBFeHRyYWN0IHRoZSB0YWJsZSBmcm9tIHRoZSBIVE1MCiAgdGFibGVfbm9kZSA8LSBodG1sX25vZGUocGFnZTFfaHRtbCwgInRhYmxlIikKICAKICAjIFJlYWQgdGhlIHRhYmxlIGludG8gYSBkYXRhZnJhbWUKICAKICAgIHBhZ2UxX2RhdGEgPC0gaHRtbF90YWJsZSh0YWJsZV9ub2RlLCBmaWxsID0gVFJVRSkKICAgIGFsbF9wYWdlc19kYXRhW1sxXV0gPC0gcGFnZTFfZGF0YQogICAgCiAgICAjdmlldyhhbGxfcGFnZXNfZGF0YSkKCiMgTG9vcCB0aHJvdWdoIHRoZSBvdGhlciA1IHBhZ2VzCmZvciAocGFnZSBpbiAyOjYpIHsKICAjIENvbnN0cnVjdCB0aGUgVVJMIGZvciBlYWNoIHBhZ2UgKGFkanVzdCBpZiBuZWNlc3NhcnkpCiBwYWdlX3VybCA8LSBwYXN0ZTAoYmFzZV9VUkxfbW9kaWZpZWQsICJwYWdlLyIsIHBhZ2UsIGJhc2VfVVJMX2VuZCkKICAKICAKICAjIFNjcmFwZSB0aGUgcGFnZQogIHBhZ2VfaHRtbCA8LSByZWFkX2h0bWwocGFnZV91cmwpCiAgCiAgIyBFeHRyYWN0IHRoZSB0YWJsZSBmcm9tIHRoZSBIVE1MCiAgdGFibGVfbm9kZSA8LSBodG1sX25vZGUocGFnZV9odG1sLCAidGFibGUiKQogIAogICMgUmVhZCB0aGUgdGFibGUgaW50byBhIGRhdGFmcmFtZQogIGlmICghaXMubnVsbCh0YWJsZV9ub2RlKSkgewogICAgcGFnZV9kYXRhIDwtIGh0bWxfdGFibGUodGFibGVfbm9kZSwgZmlsbCA9IFRSVUUpCiAgICBhbGxfcGFnZXNfZGF0YVtbcGFnZV1dIDwtIHBhZ2VfZGF0YQogIH0gZWxzZSB7CiAgICBtZXNzYWdlKCJUYWJsZSBub3QgZm91bmQgb24gcGFnZTogIiwgcGFnZSkKICB9Cn0KCiMgQ29tYmluZSBkYXRhIGZyb20gYWxsIHBhZ2VzIGludG8gYSBzaW5nbGUgZGF0YWZyYW1lCmRhdGFzZXQgPC0gYmluZF9yb3dzKGFsbF9wYWdlc19kYXRhKQoKIyBTZWxlY3QgdGhlIGZpcnN0IDkgcmVxdWlyZWQgY29sdW1ucwpzZWxlY3RlZF9jb2x1bW5zIDwtIGMoCiAgIlJhbmsiLCAiU2Nob29sIiwgIlN0dWRlbnQgdG8gRmFjdWx0eSBSYXRpbyIsICJHcmFkdWF0aW9uIFJhdGUiLAogICJSZXRlbnRpb24gUmF0ZSIsICJBY2NlcHRhbmNlIFJhdGUiLCAiRW5yb2xsbWVudCBSYXRlIiwgCiAgIkluc3RpdHV0aW9uYWwgQWlkIFJhdGUiLCAiRGVmYXVsdCBSYXRlIgopCmRhdGFzZXQgPC0gZGF0YXNldCAlPiUgc2VsZWN0KGFsbF9vZihzZWxlY3RlZF9jb2x1bW5zKSkKCiMgUmVwbGFjZSAiTi9BIiB3aXRoIE5BCiNkYXRhc2V0IDwtIGRhdGFzZXQgJT4lIG11dGF0ZShhY3Jvc3MoZXZlcnl0aGluZygpLCB+IG5hX2lmKC54LCAiTi9BIikpKQoKIyBEaXNwbGF5IHRoZSBjbGVhbmVkIGRhdGFzZXQKcHJpbnQoZGF0YXNldCkKCmZpbGVfcGF0aCA8LSAifi9Eb3dubG9hZHMvY29sbGVnZV9hY2NlcHRhbmNlX2RhdGEuY3N2IgoKIyBTYXZlIHRoZSBkYXRhc2V0IGFzIGEgQ1NWIGZpbGUKd3JpdGUuY3N2KGRhdGFzZXQsIGZpbGUgPSBmaWxlX3BhdGgsIHJvdy5uYW1lcyA9IEZBTFNFKQoKIyBNZXNzYWdlIHRvIGNvbmZpcm0gc2F2aW5nCmNhdCgiRGF0YXNldCBzYXZlZCBhczoiLCBmaWxlX3BhdGgpCgoKIyBtdWx0aXBhZ2Ugc2NyYXBpbmcgdXNpbmcgYSBmb3IgbG9vcAoKCmBgYAoKQi4gWW91IGFyZSBnb2luZyB0byBuZWVkIHRoZSBkYXRhc2V0IGNyZWF0ZWQgaW4gUXVlc3Rpb24gQSB0byBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbnMuIFRoZXJlIGFyZSAxNiBxdWVzdGlvbnMgZWFjaCBjYXJyeWluZyA1IHBvaW50czoKCjEuIFJlcGxhY2UgdGhlIG1pc3NpbmcgdmFsdWVzICJOL0EiIGluIHRoZSBkYXRhc2V0IHdpdGggTkEuCgoKICBTb2x1dGlvbjoKYGBge3J9CnBhdGggPSAiL1VzZXJzL2ZyZW5hbmRlemxhd3JlbmNlL0Rvd25sb2Fkcy9jb2xsZWdlX2FjY2VwdGFuY2VfZGF0YS5jc3YiCmRhdGEgPC0gcmVhZC5jc3YocGF0aCkKaGVhZChkYXRhLCBuPTUpCgojIFJlcGxhY2UgIk4vQSIgd2l0aCBOQSBpbiB0aGUgZW50aXJlIGRhdGFmcmFtZQpkYXRhX2NsZWFuZWQgPC0gZGF0YQpkYXRhX2NsZWFuZWRbZGF0YV9jbGVhbmVkID09ICJOL0EiXSA8LSBOQQoKClZpZXcoZGF0YV9jbGVhbmVkKQpgYGAKCjIuIENvbnZlcnQgcGVyY2VudGFnZSBjb2x1bW5zIChlLmcuLCBHcmFkdWF0aW9uIFJhdGUpIHRvIG51bWVyaWMgZm9ybWF0LgoKICAKICBTb2x1dGlvbjoKYGBge3J9CiMgTG9hZCBuZWNlc3NhcnkgbGlicmFyeQpsaWJyYXJ5KGRwbHlyKQoKIyBDb252ZXJ0IHBlcmNlbnRhZ2UgY29sdW1ucyB0byBudW1lcmljCmRhdGFfY2xlYW5lZCA8LSBkYXRhX2NsZWFuZWQgJT4lCiAgbXV0YXRlKGFjcm9zcygKICAgIGMoJ0dyYWR1YXRpb24uUmF0ZScsICdSZXRlbnRpb24uUmF0ZScsICdBY2NlcHRhbmNlLlJhdGUnLCAnRW5yb2xsbWVudC5SYXRlJywgJ0luc3RpdHV0aW9uYWwuQWlkLlJhdGUnLCAnRGVmYXVsdC5SYXRlJyksIAogICAgfiBhcy5udW1lcmljKGdzdWIoIiUiLCAiIiwgLikpCiAgKSkKCmBgYAoKCjMuIFRyYW5zZm9ybSB0aGUgIlN0dWRlbnQgdG8gRmFjdWx0eSBSYXRpbyIgY29sdW1uIGludG8gdHdvIHNlcGFyYXRlIG51bWVyaWMgY29sdW1uczogU3R1ZGVudHMgYW5kIEZhY3VsdHkuCgoKICBTb2x1dGlvbjoKYGBge3J9CiNuZWNlc3NhcnkgbGlicmFyaWVzIGFyZSBsb2FkZWQKCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoc3RyaW5ncikKCiMgRXh0cmFjdCAiU3R1ZGVudHMiIGFuZCAiRmFjdWx0eSIgaW50byBuZXcgY29sdW1ucwpkYXRhX2NsZWFuZWQgPC0gZGF0YV9jbGVhbmVkICU+JQogIG11dGF0ZSgKICAgIFN0dWRlbnRzID0gYXMubnVtZXJpYyhzdHJfZXh0cmFjdChTdHVkZW50LnRvLkZhY3VsdHkuUmF0aW8sICJeWzAtOV0rIikpLAogICAgRmFjdWx0eSA9IGFzLm51bWVyaWMoc3RyX2V4dHJhY3QoU3R1ZGVudC50by5GYWN1bHR5LlJhdGlvLCAiWzAtOV0rJCIpKQogICkKCgp2aWV3KGRhdGFfY2xlYW5lZCkKYGBgCgoKCgo0LiBXaGF0IGlzIHRoZSBjb3VudCBvZiBtaXNzaW5nIHZhbHVlcyBpbiB0aGUgIkRlZmF1bHQgUmF0ZSIgY29sdW1uPyBJbXB1dGUgdGhlIG1pc3NpbmcgdmFsdWVzIGluIHRoZSAiRGVmYXVsdCBSYXRlIiBjb2x1bW4gd2l0aCB0aGUgbWVkaWFuIHZhbHVlLgoKCiAgU29sdXRpb246CmBgYHtyfQojIENvdW50IHRoZSBudW1iZXIgb2YgbWlzc2luZyB2YWx1ZXMgaW4gIkRlZmF1bHQgUmF0ZSIKbWlzc2luZ19jb3VudCA8LSBzdW0oaXMubmEoZGF0YV9jbGVhbmVkJCdEZWZhdWx0LlJhdGUnKSkKcHJpbnQobWlzc2luZ19jb3VudCkKCiMgQ2FsY3VsYXRlIHRoZSBtZWRpYW4gb2YgdGhlICJEZWZhdWx0IFJhdGUiIGNvbHVtbgptZWRpYW5fZGVmYXVsdF9yYXRlIDwtIG1lZGlhbihkYXRhX2NsZWFuZWQkJ0RlZmF1bHQuUmF0ZScsIG5hLnJtID0gVFJVRSkKCiMgUmVwbGFjZSBtaXNzaW5nIHZhbHVlcyB3aXRoIHRoZSBtZWRpYW4KZGF0YV9jbGVhbmVkJCdEZWZhdWx0LlJhdGUnW2lzLm5hKGRhdGFfY2xlYW5lZCQnRGVmYXVsdC5SYXRlJyldIDwtIG1lZGlhbl9kZWZhdWx0X3JhdGUKCiNWaWV3KGRhdGFfY2xlYW5lZCkKYGBgCgoKNS4gRmluZCB0aGUgYXZlcmFnZSBncmFkdWF0aW9uIHJhdGUgZm9yIHVuaXZlcnNpdGllcyByYW5rZWQgaW4gdGhlIHRvcCA1MC4KCgogIFNvbHV0aW9uOgpgYGB7cn0KIyBGaWx0ZXIgdGhlIGRhdGFzZXQgZm9yIHRvcCA1MCB1bml2ZXJzaXRpZXMKdG9wXzUwX3VuaXZlcnNpdGllcyA8LSBkYXRhX2NsZWFuZWQgJT4lIGZpbHRlcihSYW5rIDw9IDUwKQoKIyBDYWxjdWxhdGUgdGhlIGF2ZXJhZ2UgZ3JhZHVhdGlvbiByYXRlCmF2ZXJhZ2VfZ3JhZHVhdGlvbl9yYXRlIDwtIG1lYW4odG9wXzUwX3VuaXZlcnNpdGllcyQnR3JhZHVhdGlvbi5SYXRlJywgbmEucm0gPSBUUlVFKQoKIyBEaXNwbGF5IHRoZSByZXN1bHQKcHJpbnQoYXZlcmFnZV9ncmFkdWF0aW9uX3JhdGUpCgoKYGBgCgoKNi4gRmlsdGVyIHVuaXZlcnNpdGllcyB3aXRoIGEgcmV0ZW50aW9uIHJhdGUgYWJvdmUgOTAlIGFuZCBmaW5kIHRoZSBjb3VudCBvZiByb3dzIGluIHRoZSBzdWJzZXQuCgoKICBTb2x1dGlvbjoKYGBge3J9CiMgTG9hZCBkcGx5cgpsaWJyYXJ5KGRwbHlyKQoKIyBGaWx0ZXIgdW5pdmVyc2l0aWVzIHdpdGggUmV0ZW50aW9uIFJhdGUgPiA5MCUKaGlnaF9yZXRlbnRpb24gPC0gZGF0YV9jbGVhbmVkICU+JSBmaWx0ZXIoUmV0ZW50aW9uLlJhdGUgPiA5MCkKCiMgQ291bnQgdGhlIHJvd3MgaW4gdGhlIHN1YnNldApoaWdoX3JldGVudGlvbl9jb3VudCA8LSBucm93KGhpZ2hfcmV0ZW50aW9uKQoKIyBEaXNwbGF5IHRoZSBjb3VudApwcmludChoaWdoX3JldGVudGlvbl9jb3VudCkKCmBgYAoKCjcuIFJhbmsgdW5pdmVyc2l0aWVzIGJ5IGVucm9sbG1lbnQgcmF0ZSBpbiBkZXNjZW5kaW5nIG9yZGVyIGFuZCBkaXNwbGF5IHRoZSBsYXN0IDYgcm93cy4KCgogIFNvbHV0aW9uOgpgYGB7cn0KIyBMb2FkIGRwbHlyCmxpYnJhcnkoZHBseXIpCgojIFJhbmsgdW5pdmVyc2l0aWVzIGJ5IEVucm9sbG1lbnQgUmF0ZSBpbiBkZXNjZW5kaW5nIG9yZGVyCnJhbmtlZF9kYXRhIDwtIGRhdGFfY2xlYW5lZCAlPiUgYXJyYW5nZShkZXNjKEVucm9sbG1lbnQuUmF0ZSkpCgojIERpc3BsYXkgdGhlIGxhc3QgNiByb3dzCnRhaWwocmFua2VkX2RhdGEsIDYpCgpgYGAKCgo4LiBDcmVhdGUgYSBoaXN0b2dyYW0gb2YgZ3JhZHVhdGlvbiByYXRlcyB1c2luZyBnZ3Bsb3QyIGxpYnJhcnkuCgoKICBTb2x1dGlvbjoKYGBge3J9CiMgTG9hZCBnZ3Bsb3QyCmxpYnJhcnkoZ2dwbG90MikKCiMgQ3JlYXRlIGEgaGlzdG9ncmFtIG9mIEdyYWR1YXRpb24gUmF0ZXMKZ2dwbG90KGRhdGFfY2xlYW5lZCwgYWVzKHggPSBHcmFkdWF0aW9uLlJhdGUpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSA1LCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIsIGFscGhhID0gMC43KSArCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgR3JhZHVhdGlvbiBSYXRlcyIsCiAgICAgICB4ID0gIkdyYWR1YXRpb24gUmF0ZSAoJSkiLAogICAgICAgeSA9ICJDb3VudCIpICsKICB0aGVtZV9taW5pbWFsKCkKCgpgYGAKCgo5LiBQbG90IGEgc2NhdHRlcnBsb3QgYmV0d2VlbiBhY2NlcHRhbmNlIHJhdGUgYW5kIGVucm9sbG1lbnQgcmF0ZSB1c2luZyBnZ3Bsb3QyIGxpYnJhcnkuCgoKICBTb2x1dGlvbjoKYGBge3J9CiMgTG9hZCBnZ3Bsb3QyCmxpYnJhcnkoZ2dwbG90MikKCiMgQ3JlYXRlIGEgc2NhdHRlcnBsb3QKZ2dwbG90KGRhdGFfY2xlYW5lZCwgYWVzKHggPSBBY2NlcHRhbmNlLlJhdGUsIHkgPSBFbnJvbGxtZW50LlJhdGUpKSArCiAgZ2VvbV9wb2ludChjb2xvciA9ICJibHVlIiwgc2l6ZSA9IDIsIGFscGhhID0gMC43KSArCiAgbGFicyh0aXRsZSA9ICJTY2F0dGVycGxvdCBvZiBBY2NlcHRhbmNlIFJhdGUgdnMgRW5yb2xsbWVudCBSYXRlIiwKICAgICAgIHggPSAiQWNjZXB0YW5jZSBSYXRlICglKSIsCiAgICAgICB5ID0gIkVucm9sbG1lbnQgUmF0ZSAoJSkiKSArCiAgdGhlbWVfbWluaW1hbCgpCgoKYGBgCgoKMTAuIENhbGN1bGF0ZSB0aGUgYXZlcmFnZSBkZWZhdWx0IHJhdGUgYnkgYWlkIHJhdGUgY2F0ZWdvcnkgKGUuZy4sIGdyb3VwZWQgaW50byByYW5nZXMgbGlrZSAwLTIwJSwgMjAtNDAlKS4gRGlzcGxheSB0aGUgY2F0ZWdvcmllcy4KCgogIFNvbHV0aW9uOgpgYGB7cn0KIyBDcmVhdGUgQWlkIFJhdGUgY2F0ZWdvcmllcwpkYXRhX2NsZWFuZWQkQWlkUmF0ZUNhdGVnb3J5IDwtIGN1dCgKICBkYXRhX2NsZWFuZWQkSW5zdGl0dXRpb25hbC5BaWQuUmF0ZSwKICBicmVha3MgPSBjKDAsIDIwLCA0MCwgNjAsIDgwLCAxMDApLAogIGxhYmVscyA9IGMoIjAtMjAlIiwgIjIwLTQwJSIsICI0MC02MCUiLCAiNjAtODAlIiwgIjgwLTEwMCUiKSwKICBpbmNsdWRlLmxvd2VzdCA9IFRSVUUKKQoKIyBMb2FkIGRwbHlyCmxpYnJhcnkoZHBseXIpCgojIENhbGN1bGF0ZSBhdmVyYWdlIGRlZmF1bHQgcmF0ZSBieSBhaWQgcmF0ZSBjYXRlZ29yeQphdmVyYWdlX2RlZmF1bHRfcmF0ZSA8LSBkYXRhX2NsZWFuZWQgJT4lCiAgZ3JvdXBfYnkoQWlkUmF0ZUNhdGVnb3J5KSAlPiUKICBzdW1tYXJpemUoQXZlcmFnZURlZmF1bHRSYXRlID0gbWVhbihEZWZhdWx0LlJhdGUsIG5hLnJtID0gVFJVRSkpCgojIERpc3BsYXkgdGhlIHJlc3VsdApwcmludChhdmVyYWdlX2RlZmF1bHRfcmF0ZSkKCnZpZXcoZGF0YV9jbGVhbmVkKQpgYGAKCgoKCjExLiBOb3JtYWxpemUgdGhlIGFjY2VwdGFuY2UgcmF0ZSB0byBhIHNjYWxlIG9mIDAtMSBhbmQgc2F2ZSBpbiBhIG5ldyBjb2x1bW4gIkFjY2VwdGFuY2UgUmF0ZSBOb3JtYWxpemVkIi4gRGlzcGxheSB0aGUgZmlyc3QgNiB2YWx1ZXMuCgoKICBTb2x1dGlvbjoKIApgYGB7cn0KbGlicmFyeShzY2FsZXMpCiMgTm9ybWFsaXplIEFjY2VwdGFuY2UgUmF0ZSB0byAwLTEgc2NhbGUKZGF0YV9jbGVhbmVkJGBBY2NlcHRhbmNlIFJhdGUgTm9ybWFsaXplZGAgPC0gKGRhdGFfY2xlYW5lZCRBY2NlcHRhbmNlLlJhdGUgLSBtaW4oZGF0YV9jbGVhbmVkJEFjY2VwdGFuY2UuUmF0ZSwgbmEucm0gPSBUUlVFKSkgLyAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKG1heChkYXRhX2NsZWFuZWQkQWNjZXB0YW5jZS5SYXRlLCBuYS5ybSA9IFRSVUUpIC0gbWluKGRhdGFfY2xlYW5lZCRBY2NlcHRhbmNlLlJhdGUsIG5hLnJtID0gVFJVRSkpCgojIERpc3BsYXkgdGhlIGZpcnN0IDYgdmFsdWVzIG9mIHRoZSBub3JtYWxpemVkIGNvbHVtbgpoZWFkKGRhdGFfY2xlYW5lZCQnQWNjZXB0YW5jZSBSYXRlIE5vcm1hbGl6ZWQnKQoKCmBgYAoKMTIuIFdoYXQgaXMgdGhlIGNvdW50IG9mIHRoZSBkdXBsaWNhdGUgZW50cmllcyBpbiB0aGUgIlNjaG9vbCIgY29sdW1uPyBSZW1vdmUgZHVwbGljYXRlIHVuaXZlcnNpdHkgZW50cmllcy4KCgogU29sdXRpb246CgpgYGB7cn0KIyBDb3VudCBkdXBsaWNhdGUgZW50cmllcyBpbiB0aGUgIlNjaG9vbCIgY29sdW1uCmR1cGxpY2F0ZV9jb3VudCA8LSBzdW0oZHVwbGljYXRlZChkYXRhX2NsZWFuZWQkU2Nob29sKSkKcHJpbnQoZHVwbGljYXRlX2NvdW50KQoKIyBMb2FkIGRwbHlyCmxpYnJhcnkoZHBseXIpCgojIFJlbW92ZSBkdXBsaWNhdGUgZW50cmllcyBiYXNlZCBvbiAiU2Nob29sIgpkYXRhX2NsZWFuZWQgPC0gZGF0YV9jbGVhbmVkICU+JSBkaXN0aW5jdChTY2hvb2wsIC5rZWVwX2FsbCA9IFRSVUUpCgoKYGBgCgoKMTMuIEZpbmQgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gZ3JhZHVhdGlvbiByYXRlIGFuZCByZXRlbnRpb24gcmF0ZSAoZXhjbHVkZSB0aGUgTkFzIGluIGJvdGggY29sdW1ucykuCgoKIFNvbHV0aW9uOgoKYGBge3J9CiMgQ2FsY3VsYXRlIHRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIEdyYWR1YXRpb24gUmF0ZSBhbmQgUmV0ZW50aW9uIFJhdGUKY29ycmVsYXRpb24gPC0gY29yKGRhdGFfY2xlYW5lZCRHcmFkdWF0aW9uLlJhdGUsIGRhdGFfY2xlYW5lZCRSZXRlbnRpb24uUmF0ZSwgdXNlID0gImNvbXBsZXRlLm9icyIpCgojIERpc3BsYXkgdGhlIHJlc3VsdApwcmludChjb3JyZWxhdGlvbikKCgpgYGAKCgoKMTQuIEV4dHJhY3QgdGhlIHZhbHVlcyBpbiBTY2hvb2wgY29sdW1uIGludG8gYSBuZXcgdmFyaWFibGUgd2l0aG91dCAiVW5pdmVyc2l0eSIgaW4gdGhlIHN0cmluZy4gRm9yIGV4YW1wbGUgIlJvd2FuIFVuaXZlcnNpdHkiIGJlY29tZXMgIlJvd2FuIgoKCiBTb2x1dGlvbjoKCmBgYHtyfQojIFJlbW92ZSAiVW5pdmVyc2l0eSIgZnJvbSB0aGUgU2Nob29sIGNvbHVtbgojVGhlIGV4YXBsZSBwcm92aWRlZCB3YXMgZm9yIHNjaG9vbHMgdGhhdCBoYWQgdW5pdmVyc2l0eSBhdCBhcyB0aGUgc2Vjb25kIHBvcnRpb24gb2YgdGhlIHN0cmluZywgaGVuY2UgdGhlIHNvbHV0aW9uIHdhcyB3cml0dGVuIGxpa2UgdGhhdC4gCgojIExvYWQgc3RyaW5ncgpsaWJyYXJ5KHN0cmluZ3IpCgojIFJlbW92ZSAiVW5pdmVyc2l0eSIgZnJvbSB0aGUgU2Nob29sIGNvbHVtbgpzY2hvb2xfbmFtZXMgPC0gc3RyX3JlcGxhY2UoZGF0YV9jbGVhbmVkJFNjaG9vbCwgIiBVbml2ZXJzaXR5IiwgIiIpCgoKIyBEaXNwbGF5IHRoZSByZXN1bHQKcHJpbnQoaGVhZChzY2hvb2xfbmFtZXMpKQoKYGBgCgoKCgoxNS4gQ291bnQgaG93IG1hbnkgdW5pdmVyc2l0aWVzIGhhdmUgIkluc3RpdHV0ZSIgaW4gdGhlaXIgbmFtZS4KCgogU29sdXRpb246CgpgYGB7cn0KIyBMb2FkIHN0cmluZ3IKbGlicmFyeShzdHJpbmdyKQoKIyBDb3VudCB1bml2ZXJzaXRpZXMgd2l0aCAiSW5zdGl0dXRlIiBpbiB0aGVpciBuYW1lCmluc3RpdHV0ZV9jb3VudCA8LSBzdW0oc3RyX2RldGVjdChkYXRhX2NsZWFuZWQkU2Nob29sLCAiSW5zdGl0dXRlIikpCgojIERpc3BsYXkgdGhlIGNvdW50CnByaW50KGluc3RpdHV0ZV9jb3VudCkKCgoKYGBgCgoxNi4gRXhwb3J0IHRoZSBjbGVhbmVkIGFuZCBwcm9jZXNzZWQgZGF0YXNldCB0byBhIENTViBmaWxlLgoKCiBTb2x1dGlvbjoKCmBgYHtyfQpmaWxlX3BhdGggPC0gIn4vRG93bmxvYWRzL2NsZWFuZWRfY29sbGVnZV9hY2NlcHRhbmNlX2RhdGEuY3N2IgoKIyBTYXZlIHRoZSBkYXRhc2V0IGFzIGEgQ1NWIGZpbGUKd3JpdGUuY3N2KGRhdGFzZXQsIGZpbGUgPSBmaWxlX3BhdGgsIHJvdy5uYW1lcyA9IEZBTFNFKQoKCmBgYAoK