Assignment5

Import, Read, and Preview Data

library("readxl")
library("tidyverse")

## ── Attaching packages ─────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ── Conflicts ────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
buildings <- read_excel('tallestbuildings.xlsx')
buildings

## # A tibble: 100 x 9
##     Rank `Building Name` City  `Height (m)` `Height (ft)` Floors Completed
##    <dbl> <chr>           <chr>        <dbl>         <dbl>  <dbl>     <dbl>
##  1     1 Burj Khalifa    Duba…         828           2717    163      2010
##  2     2 Shanghai Tower  Shan…         632           2073    128      2015
##  3     3 Makkah Royal C… Mecc…         601           1972    120      2012
##  4     4 One World Trad… New …         541.          1776     94      2014
##  5     5 TAIPEI 101      Taip…         508           1667    101      2004
##  6     6 Shanghai World… Shan…         492           1614    101      2008
##  7     7 International … Hong…         484           1588    108      2010
##  8     8 Petronas Twin … Kual…         452.          1483     88      1998
##  9     8 Petronas Twin … Kual…         452.          1483     88      1998
## 10    10 Zifeng Tower    Nanj…         450           1476     66      2010
## # … with 90 more rows, and 2 more variables: Material <chr>, Use <chr>

Rename columns and clean up data

library(rattle) # need to install the package first

## Rattle: A free graphical interface for data science with R.
## Version 5.3.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.

library(magrittr) # need to install that package first

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

names(buildings) %<>% normVarNames()
buildings

## # A tibble: 100 x 9
##     rank building_name  city  height_m height_ft floors completed material use  
##    <dbl> <chr>          <chr>    <dbl>     <dbl>  <dbl>     <dbl> <chr>    <chr>
##  1     1 Burj Khalifa   Duba…     828       2717    163      2010 steel/c… offi…
##  2     2 Shanghai Tower Shan…     632       2073    128      2015 composi… hote…
##  3     3 Makkah Royal … Mecc…     601       1972    120      2012 steel/c… othe…
##  4     4 One World Tra… New …     541.      1776     94      2014 composi… offi…
##  5     5 TAIPEI 101     Taip…     508       1667    101      2004 composi… offi…
##  6     6 Shanghai Worl… Shan…     492       1614    101      2008 composi… hote…
##  7     7 International… Hong…     484       1588    108      2010 composi… hote…
##  8     8 Petronas Twin… Kual…     452.      1483     88      1998 composi… offi…
##  9     8 Petronas Twin… Kual…     452.      1483     88      1998 composi… offi…
## 10    10 Zifeng Tower   Nanj…     450       1476     66      2010 composi… hote…
## # … with 90 more rows

Group the buildings by City in descending order

Citycounts=buildings%>%
  group_by(city)%>%
  summarise(number=length(city))
arrange(Citycounts, desc(number))

## # A tibble: 38 x 2
##    city               number
##    <chr>               <int>
##  1 Dubai (AE)             18
##  2 New York City (US)      7
##  3 Chicago (US)            6
##  4 Guangzhou (CN)          6
##  5 Hong Kong (CN)          6
##  6 Abu Dhabi (AE)          4
##  7 Moscow (RU)             4
##  8 Shanghai (CN)           4
##  9 Shenzhen (CN)           4
## 10 Kuala Lumpur (MY)       3
## # … with 28 more rows

Group and rank means of city buildings and plot

Citycounts=buildings%>%
  group_by(city)%>%
  summarise(mean_height=mean(height_ft))
mean_buildings_sort <- arrange(Citycounts, desc(mean_height))

ggplot(data = mean_buildings_sort, mapping = aes(reorder(city, mean_height), mean_height)) + geom_bar(stat = 'identity') + coord_flip() + ggtitle('Mean Height of Tallest Buildings by City') + labs(y = 'Mean of Building Heights (ft)', x = 'City')

Create new columns by separating the city and country

library(countrycode)
buildings <- buildings %>%
  separate(city, into = c('city', 'country'), sep = '[:punct:]')

## Warning: Expected 2 pieces. Additional pieces discarded in 100 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

Group the buildings by Country in descending order

# use the country code function
buildings$country = countrycode(buildings$country, "iso2c", "country.name")
Countrycounts=buildings%>%
  group_by(country)%>%
  summarise(number=length(country))
arrange(Countrycounts, desc(number))

## # A tibble: 13 x 2
##    country              number
##    <chr>                 <int>
##  1 China                    42
##  2 United Arab Emirates     22
##  3 United States            17
##  4 Russia                    4
##  5 Malaysia                  3
##  6 Saudi Arabia              3
##  7 Taiwan                    2
##  8 Thailand                  2
##  9 Australia                 1
## 10 Kuwait                    1
## 11 South Korea               1
## 12 United Kingdom            1
## 13 Vietnam                   1

Group and rank means of country buildings and plot

Countrycounts=buildings%>%
  group_by(country)%>%
  summarise(mean_height=mean(height_ft))
mean_buildings_sort <- arrange(Countrycounts, desc(mean_height))

ggplot(data = mean_buildings_sort, mapping = aes(reorder(country, mean_height), mean_height)) + geom_bar(stat = 'identity') + coord_flip() + ggtitle('Mean Height of Tallest Buildings by Country') + labs(y = 'Mean of Building Heights (ft)', x = 'Country')

Color each bar for a country based on the number of buildings from this dataset that are present in that country

Countrycounts=buildings%>%
  group_by(country)%>%
  summarise(mean_height=mean(height_ft), count=length(country))
Countrycounts

## # A tibble: 13 x 3
##    country              mean_height count
##    <chr>                      <dbl> <int>
##  1 Australia                  1058      1
##  2 China                      1167.    42
##  3 Kuwait                     1354      1
##  4 Malaysia                   1328.     3
##  5 Russia                     1069.     4
##  6 Saudi Arabia               1325.     3
##  7 South Korea                1001      1
##  8 Taiwan                     1404.     2
##  9 Thailand                   1014      2
## 10 United Arab Emirates       1193.    22
## 11 United Kingdom             1004      1
## 12 United States              1168.    17
## 13 Vietnam                    1078      1

ggplot(data = Countrycounts, mapping = aes(reorder(country, mean_height), mean_height, fill = as.factor(count))) + geom_bar(stat = 'identity') + coord_flip() + ggtitle('Mean Height of Tallest Buildings by Country') + labs(y = 'Mean of Building Heights (ft)', x = 'Country') + guides(fill=guide_legend(title="Building Count"))

Split use of buildings, pivot longer, and comparing mean heights for building purposes

buildings <- buildings %>%
  separate(use, into = c('use1', 'use2', 'use3'), sep = ' / ')

## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 89 rows [2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].

BuildingPurpose=buildings%>%
  # Extra non-essential step to visualize df progression and split
  group_by(building_name)%>%
  # Extra non-essential step to visualize df progression and split
  summarise(use1, use2, use3, height_ft)%>%
  pivot_longer(cols = c('use1', 'use2', 'use3'), values_to = "use", values_drop_na = TRUE)

BuildingPurpose%>%
  group_by(use)%>%
  summarise(mean_height=mean(height_ft))

## # A tibble: 6 x 2
##   use                 mean_height
##   <chr>                     <dbl>
## 1 hotel                     1210.
## 2 office                    1191.
## 3 other                     1972 
## 4 residential               1150.
## 5 retail                    1117 
## 6 serviced apartments       1118

Assignment5

Bryce Owen, Abbie McKay, and Kimberly Tang

3/1/2020

Import, Read, and Preview Data

Rename columns and clean up data

Group the buildings by City in descending order

Group and rank means of city buildings and plot

Create new columns by separating the city and country

Group the buildings by Country in descending order

Group and rank means of country buildings and plot

Color each bar for a country based on the number of buildings from this dataset that are present in that country

Split use of buildings, pivot longer, and comparing mean heights for building purposes