library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)

create datasets to be used

discographyKH <- tibble(album=c("Are You Experienced", "Axis: Bold as Love", "Electric Ladyland"
),year=c(1967, 1967, 1968
)
)

# discographyKH == discography

discography <- discographyKH


"Are You Experienced" <- tibble(song=c("Purple Haze", "Manic Depression", "Hey Joe", "May This Be Love", "I Don't Live Today", "The Wind Cries Mary", "Fire", "Third Stone from the Sun", "Foxy Lady", "Are You Experienced?"
),length=c(9960, 13560, 12180, 11640, 14100, 12060, 9240, 24000, 11700, 14100
)
)

"Axis: Bold As Love" <- tibble(song=c("EXP", "Up from the Skies", "Spanish Castle Magic", "Wait Until Tomorrow", "Ain't No Telling", "Little Wing", "If 6 was 9", "You Got Me Floatin", "Castles Made of Sand", "She's So Fine", "One Rainy Wish", "Little Miss Lover", "Bold as Love"
),length=c(6900, 10500, 10800, 10800, 6360, 8640, 19920, 9900, 9960, 9420, 13200, 8400, 15060
)
)

"Electric Ladyland" <- tibble(song=c("And the Gods Made Love", "Have You Ever Been (To Electric Ladyland)", "Crosstown Traffic", "Voodoo Chile", "Little Miss Strange", "Long Hot Summer Night", "Come On (Part 1)", "Gypsy Eyes", "Burning of the Midnight Lamp", "Rainy Day, Dream Away", "1983... (A Merman I Should Turn to Be)", "Moon, Turn the Tides... Gently Gently Away", "Still Raining, Still Dreaming", "House Burning Down", "All Along the Watchtower", "Voodoo Child (Slight Return)"
),length=c(4860, 7860, 8700, 54000, 10320, 12420, 14940, 13380, 13140, 13320, 49140, 3720, 15900, 16380, 14460, 18720
)
)

jimiKH <- list("Are You Experienced"="Are You Experienced",
"Axis: Bold As Love"="Axis: Bold As Love",
"Electric Ladyland"="Electric Ladyland")

# jimiKH == jimi
# Error: comparison of these types is not implemented

jimi <- jimiKH

# this is not identical to dataset provided

# so define directly as a list

jimiKH <- list("Are You Experienced"=tibble(song=c("Purple Haze", "Manic Depression", "Hey Joe", "May This Be Love", "I Don't Live Today", "The Wind Cries Mary", "Fire", "Third Stone from the Sun", "Foxy Lady", "Are You Experienced?"
),length=c(9960, 13560, 12180, 11640, 14100, 12060, 9240, 24000, 11700, 14100
)
)

,
"Axis: Bold As Love"=tibble(song=c("EXP", "Up from the Skies", "Spanish Castle Magic", "Wait Until Tomorrow", "Ain't No Telling", "Little Wing", "If 6 was 9", "You Got Me Floatin", "Castles Made of Sand", "She's So Fine", "One Rainy Wish", "Little Miss Lover", "Bold as Love"
),length=c(6900, 10500, 10800, 10800, 6360, 8640, 19920, 9900, 9960, 9420, 13200, 8400, 15060
)
)

,
"Electric Ladyland"=tibble(song=c("And the Gods Made Love", "Have You Ever Been (To Electric Ladyland)", "Crosstown Traffic", "Voodoo Chile", "Little Miss Strange", "Long Hot Summer Night", "Come On (Part 1)", "Gypsy Eyes", "Burning of the Midnight Lamp", "Rainy Day, Dream Away", "1983... (A Merman I Should Turn to Be)", "Moon, Turn the Tides... Gently Gently Away", "Still Raining, Still Dreaming", "House Burning Down", "All Along the Watchtower", "Voodoo Child (Slight Return)"
),length=c(4860, 7860, 8700, 54000, 10320, 12420, 14940, 13380, 13140, 13320, 49140, 3720, 15900, 16380, 14460, 18720
)
)
)

# jimiKH$`Are You Experienced` == jimi$`Are You Experienced`

jimi <- jimiKH

bind_rows() and bind_cols()

  • differences between
    • dplyr’s bind_rows() and bind_cols()
    • and base R’s rbind() and cbind():

      • bind_rows() and bind_cols()
        • are faster than rbind() and cbind().
        • can take a list of data frames as input.
        • always return a tibble (a data frame with class tbl_df).
      • rbind() returns an error when column names do not match across data frames. bind_rows() creates a column for each unique column name and distributes missing values as appropriate.

x %>% bind_rows(y)

The .id argument to bind_rows() is a string which specifies the name of the column that results from binding the rows of separate data frames into one data frame.

# jimi = list of df
jimi %>% 
  # Bind jimi into a single data frame
  bind_rows(.id = "album") %>% 
    # Make a complete data frame
    left_join(discography)
## Joining, by = "album"
## # A tibble: 39 x 4
##    album               song                     length  year
##    <chr>               <chr>                     <dbl> <dbl>
##  1 Are You Experienced Purple Haze                9960  1967
##  2 Are You Experienced Manic Depression          13560  1967
##  3 Are You Experienced Hey Joe                   12180  1967
##  4 Are You Experienced May This Be Love          11640  1967
##  5 Are You Experienced I Don't Live Today        14100  1967
##  6 Are You Experienced The Wind Cries Mary       12060  1967
##  7 Are You Experienced Fire                       9240  1967
##  8 Are You Experienced Third Stone from the Sun  24000  1967
##  9 Are You Experienced Foxy Lady                 11700  1967
## 10 Are You Experienced Are You Experienced?      14100  1967
## # ... with 29 more rows

Example: bind cols

  • hank_years contains the name and release year of each of Hank Williams’ 67 singles.
  • hank_charts contains the name of each of Hank Williams’ 67 singles as well as the highest position it earned on the Billboard sales charts.
> hank_years
# A tibble: 67 × 2
    year                                    song
   <int>                                   <chr>
1   1947                         Move It On Over
2   1947    My Love for You (Has Turned to Hate)
3   1947 Never Again (Will I Knock on Your Door)
4   1947    On the Banks of the Old Ponchartrain
5   1947                            Pan American
6   1947             Wealth Won't Save Your Soul
7   1948                   A Mansion on the Hill
8   1948                           Honky Tonkin'
9   1948                         I Saw the Light
10  1948                   I'm a Long Gone Daddy
# ... with 57 more rows
> hank_charts
# A tibble: 67 × 2
                              song  peak
                             <chr> <int>
1  (I Heard That) Lonesome Whistle     9
2     (I'm Gonna) Sing, Sing, Sing    NA
3                 A Home in Heaven    NA
4            A Mansion on the Hill    12
5             A Teardrop on a Rose    NA
6        At the First Fall of Snow    NA
7       Baby, We're Really in Love     4
8                California Zephyr    NA
9                      Calling You    NA
10                Cold, Cold Heart     1
# ... with 57 more rows
  • Each dataset contains the same songs
    • hank_years is arranged chronologically by year
    • hank_charts is arranged alphabetically by song title
hank_years %>% 
  # Reorder hank_years alphabetically by song title
  arrange(song) %>% 
    # Select just the year column
    select(year) %>% 
      # Bind the year column to hank_charts
      bind_cols(hank_charts) %>% 
        # Arrange the finished dataset
        arrange(year, song)

# # A tibble: 67 × 3
#     year                                    song  peak
#    <int>                                   <chr> <int>
# 1   1947                         Move It On Over     4
# 2   1947    My Love for You (Has Turned to Hate)    NA
# 3   1947 Never Again (Will I Knock on Your Door)    NA
# 4   1947    On the Banks of the Old Ponchartrain    NA
# 5   1947                            Pan American    NA
# 6   1947             Wealth Won't Save Your Soul    NA
# 7   1948                   A Mansion on the Hill    12
# 8   1948                           Honky Tonkin'    14
# 9   1948                         I Saw the Light    NA
# 10  1948             I'm So Lonesome I Could Cry     2
# # ... with 57 more rows

data_frame() and as_data_frame()

  • R Base way to create a dataframe
    • data.frame() and as.data.frame()
  • dplyr way to create a dataframe
    • data_frame() and as_data_frame()
  • Advantages of using data_frame():
    • will never change the data type of a vector. (e.g. strings to factors)
      • data_frame never coerces strings to factors.
    • will never add row names
    • will not changes unusual column names
      • data_frame will not change your column names, even if they are unorthodox.
    • only recycles length 1 inputs
    • evaluate its arguments lazily and in order. (So you can reference a column in the next column definition)
    • outputs a tibble (class tbl_df)
      • data_frame always returns a data frame of class tbl_df (if true, assume this is an advantage).
# Make combined data frame
data_frame(year = hank_year, song = hank_song, peak = hank_peak) %>% 
  # Extract songs where peak equals 1
    # i.e. Hank's number one hits
    filter(peak == 1)

# # A tibble: 11 × 3
#     year                                   song  peak
#    <int>                                  <chr> <int>
# 1   1949                         Lovesick Blues     1
# 2   1950               Long Gone Lonesome Blues     1
# 3   1950                      Moanin' the Blues     1
# 4   1950                  Why Don't You Love Me     1
# 5   1951                       Cold, Cold Heart     1
# 6   1951                       Hey Good Lookin'     1
# 7   1952 I'll Never Get Out of This World Alive     1
# 8   1952               Jambalaya (On the Bayou)     1
# 9   1953                               Kaw-Liga     1
# 10  1953        Take These Chains from My Heart     1
# 11  1953                    Your Cheatin' Heart     1

hank - stored as list of vectors:

$year
 [1] 1947 1947 1947 1947 1947 1947 1948 1948 1948 1948 1948 1949 1949 1949 1949
[16] 1949 1949 1949 1949 1950 1950 1950 1950 1950 1950 1950 1950 1951 1951 1951
[31] 1951 1951 1951 1951 1951 1952 1952 1952 1952 1952 1952 1953 1953 1953 1953
[46] 1953 1953 1954 1954 1954 1954 1955 1955 1955 1955 1955 1956 1956 1956 1956
[61] 1957 1957 1957 1958 1965 1966 1989

$song
 [1] "Move It On Over"
 [2] "My Love for You (Has Turned to Hate)"
 [3] "Never Again (Will I Knock on Your Door)"
 [4] "On the Banks of the Old Ponchartrain"
 [5] "Pan American"
 [6] "Wealth Won't Save Your Soul"
 [7] "A Mansion on the Hill"
 [8] "Honky Tonkin'"
 [9] "I Saw the Light"
[10] "I'm So Lonesome I Could Cry"
[11] "My Sweet Love Ain't Around"
[12] "I'm Satisfied with You"
[13] "Lost Highway"
[14] "Lovesick Blues"
[15] "Mind Your Own Business"
[16] "My Bucket's Got a Hole in It"
[17] "Never Again (Will I Knock on Your Door)"
[18] "Wedding Bells"
[19] "You're Gonna Change (Or I'm Gonna Leave)"
[20] "I Just Don't Like This Kind of Living"
[21] "Long Gone Lonesome Blues"
[22] "Moanin' the Blues"
[23] "My Son Calls Another Man Daddy"
[24] "Nobody's Lonesome for Me"
[25] "They'll Never Take Her Love from Me"
[26] "Why Don't You Love Me"
[27] "Why Should We Try Anymore"
[28] "(I Heard That) Lonesome Whistle"
[29] "Baby, We're Really in Love"
[30] "Cold, Cold Heart"
[31] "Crazy Heart"
[32] "Dear John"
[33] "Hey Good Lookin'"
[34] "Howlin' At the Moon"
[35] "I Can't Help It (If I'm Still in Love With You)"
[36] "Half as Much"
[37] "Honky Tonk Blues"
[38] "I'll Never Get Out of This World Alive"
[39] "Jambalaya (On the Bayou)"
[40] "Settin' the Woods on Fire"
[41] "You Win Again"
[42] "Calling You"
[43] "I Won't Be Home No More"
[44] "Kaw-Liga"
[45] "Take These Chains from My Heart"
[46] "Weary Blues from Waitin'"
[47] "Your Cheatin' Heart"
[48] "(I'm Gonna) Sing, Sing, Sing"
[49] "How Can You Refuse Him Now"
[50] "I'm a Long Gone Daddy"
[51] "You Better Keep It on Your Mind"
[52] "A Teardrop on a Rose"
[53] "At the First Fall of Snow"
[54] "Mother Is Gone"
[55] "Please Don't Let Me Love You"
[56] "Thank God"
[57] "A Home in Heaven"
[58] "California Zephyr"
[59] "Singing Waterfall"
[60] "There's a Tear in My Beer"
[61] "Leave Me Alone with the Blues"
[62] "Ready to Go Home"
[63] "The Waltz of the Wind"
[64] "Just Waitin'"
[65] "The Pale Horse and His Rider"
[66] "Kaw-Liga"
[67] "There's No Room in My Heart for the Blues"

$peak
 [1]  4 NA NA NA NA NA 12 14 NA  2 NA NA 12  1  5  2  6  2  4  5  1  1  9  9  5
[26]  1  9  9  4  1  4  8  1  3  2  2  2  1  1  2 10 NA  4  1  1  7  1 NA NA  6
[51] NA NA NA NA  9 NA NA NA NA  7 NA NA NA NA NA NA NA

create this hank dataset to be used:


hank <- list(year = c(1947, 1947, 1947, 1947, 1947, 1947, 1948, 1948, 1948, 1948, 1948, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1949, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953, 1953, 1953, 1953, 1953, 1954, 1954, 1954, 1954, 1955, 1955, 1955, 1955, 1955, 1956, 1956, 1956, 1956, 1957, 1957, 1957, 1958, 1965, 1966, 1989
) , song = c( "Move It On Over",  "My Love for You (Has Turned to Hate)",  "Never Again (Will I Knock on Your Door)",  "On the Banks of the Old Ponchartrain",  "Pan American",  "Wealth Won't Save Your Soul",  "A Mansion on the Hill",  "Honky Tonkin'",  "I Saw the Light",  "I'm So Lonesome I Could Cry",  "My Sweet Love Ain't Around",  "I'm Satisfied with You",  "Lost Highway",  "Lovesick Blues",  "Mind Your Own Business",  "My Bucket's Got a Hole in It",  "Never Again (Will I Knock on Your Door)",  "Wedding Bells",  "You're Gonna Change (Or I'm Gonna Leave)",  "I Just Don't Like This Kind of Living",  "Long Gone Lonesome Blues",  "Moanin' the Blues",  "My Son Calls Another Man Daddy",  "Nobody's Lonesome for Me",  "They'll Never Take Her Love from Me",  "Why Don't You Love Me",  "Why Should We Try Anymore",  "(I Heard That) Lonesome Whistle",  "Baby, We're Really in Love",  "Cold, Cold Heart",  "Crazy Heart",  "Dear John",  "Hey Good Lookin'",  "Howlin' At the Moon",  "I Can't Help It (If I'm Still in Love With You)",  "Half as Much",  "Honky Tonk Blues",  "I'll Never Get Out of This World Alive",  "Jambalaya (On the Bayou)",  "Settin' the Woods on Fire",  "You Win Again",  "Calling You",  "I Won't Be Home No More",  "Kaw-Liga",  "Take These Chains from My Heart",  "Weary Blues from Waitin'",  "Your Cheatin' Heart",  "(I'm Gonna) Sing, Sing, Sing",  "How Can You Refuse Him Now",  "I'm a Long Gone Daddy",  "You Better Keep It on Your Mind",  "A Teardrop on a Rose",  "At the First Fall of Snow",  "Mother Is Gone",  "Please Don't Let Me Love You",  "Thank God",  "A Home in Heaven",  "California Zephyr",  "Singing Waterfall",  "There's a Tear in My Beer",  "Leave Me Alone with the Blues",  "Ready to Go Home",  "The Waltz of the Wind",  "Just Waitin'",  "The Pale Horse and His Rider",  "Kaw-Liga",  "There's No Room in My Heart for the Blues"
) , peak = c(4, NA, NA, NA, NA, NA, 12, 14, NA, 2, NA, NA, 12, 1, 5, 2, 6, 2, 4, 5, 1, 1, 9, 9, 5, 1, 9, 9, 4, 1, 4, 8, 1, 3, 2, 2, 2, 1, 1, 2, 10, NA, 4, 1, 1, 7, 1, NA, NA, 6, NA, NA, NA, NA, 9, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA, NA, NA
)
)

hank
## $year
##  [1] 1947 1947 1947 1947 1947 1947 1948 1948 1948 1948 1948 1949 1949 1949
## [15] 1949 1949 1949 1949 1949 1950 1950 1950 1950 1950 1950 1950 1950 1951
## [29] 1951 1951 1951 1951 1951 1951 1951 1952 1952 1952 1952 1952 1952 1953
## [43] 1953 1953 1953 1953 1953 1954 1954 1954 1954 1955 1955 1955 1955 1955
## [57] 1956 1956 1956 1956 1957 1957 1957 1958 1965 1966 1989
## 
## $song
##  [1] "Move It On Over"                                
##  [2] "My Love for You (Has Turned to Hate)"           
##  [3] "Never Again (Will I Knock on Your Door)"        
##  [4] "On the Banks of the Old Ponchartrain"           
##  [5] "Pan American"                                   
##  [6] "Wealth Won't Save Your Soul"                    
##  [7] "A Mansion on the Hill"                          
##  [8] "Honky Tonkin'"                                  
##  [9] "I Saw the Light"                                
## [10] "I'm So Lonesome I Could Cry"                    
## [11] "My Sweet Love Ain't Around"                     
## [12] "I'm Satisfied with You"                         
## [13] "Lost Highway"                                   
## [14] "Lovesick Blues"                                 
## [15] "Mind Your Own Business"                         
## [16] "My Bucket's Got a Hole in It"                   
## [17] "Never Again (Will I Knock on Your Door)"        
## [18] "Wedding Bells"                                  
## [19] "You're Gonna Change (Or I'm Gonna Leave)"       
## [20] "I Just Don't Like This Kind of Living"          
## [21] "Long Gone Lonesome Blues"                       
## [22] "Moanin' the Blues"                              
## [23] "My Son Calls Another Man Daddy"                 
## [24] "Nobody's Lonesome for Me"                       
## [25] "They'll Never Take Her Love from Me"            
## [26] "Why Don't You Love Me"                          
## [27] "Why Should We Try Anymore"                      
## [28] "(I Heard That) Lonesome Whistle"                
## [29] "Baby, We're Really in Love"                     
## [30] "Cold, Cold Heart"                               
## [31] "Crazy Heart"                                    
## [32] "Dear John"                                      
## [33] "Hey Good Lookin'"                               
## [34] "Howlin' At the Moon"                            
## [35] "I Can't Help It (If I'm Still in Love With You)"
## [36] "Half as Much"                                   
## [37] "Honky Tonk Blues"                               
## [38] "I'll Never Get Out of This World Alive"         
## [39] "Jambalaya (On the Bayou)"                       
## [40] "Settin' the Woods on Fire"                      
## [41] "You Win Again"                                  
## [42] "Calling You"                                    
## [43] "I Won't Be Home No More"                        
## [44] "Kaw-Liga"                                       
## [45] "Take These Chains from My Heart"                
## [46] "Weary Blues from Waitin'"                       
## [47] "Your Cheatin' Heart"                            
## [48] "(I'm Gonna) Sing, Sing, Sing"                   
## [49] "How Can You Refuse Him Now"                     
## [50] "I'm a Long Gone Daddy"                          
## [51] "You Better Keep It on Your Mind"                
## [52] "A Teardrop on a Rose"                           
## [53] "At the First Fall of Snow"                      
## [54] "Mother Is Gone"                                 
## [55] "Please Don't Let Me Love You"                   
## [56] "Thank God"                                      
## [57] "A Home in Heaven"                               
## [58] "California Zephyr"                              
## [59] "Singing Waterfall"                              
## [60] "There's a Tear in My Beer"                      
## [61] "Leave Me Alone with the Blues"                  
## [62] "Ready to Go Home"                               
## [63] "The Waltz of the Wind"                          
## [64] "Just Waitin'"                                   
## [65] "The Pale Horse and His Rider"                   
## [66] "Kaw-Liga"                                       
## [67] "There's No Room in My Heart for the Blues"      
## 
## $peak
##  [1]  4 NA NA NA NA NA 12 14 NA  2 NA NA 12  1  5  2  6  2  4  5  1  1  9
## [24]  9  5  1  9  9  4  1  4  8  1  3  2  2  2  1  1  2 10 NA  4  1  1  7
## [47]  1 NA NA  6 NA NA NA NA  9 NA NA NA NA  7 NA NA NA NA NA NA NA

Turn this created hank into the same dataset made in the last exercise

# Convert the hank list into a data frame
as_data_frame(hank) %>% 
  # Extract songs where peak equals 1 
  #                    (i.e. the number one hits)
  filter(peak == 1)
## # A tibble: 11 x 3
##     year song                                    peak
##    <dbl> <chr>                                  <dbl>
##  1  1949 Lovesick Blues                             1
##  2  1950 Long Gone Lonesome Blues                   1
##  3  1950 Moanin' the Blues                          1
##  4  1950 Why Don't You Love Me                      1
##  5  1951 Cold, Cold Heart                           1
##  6  1951 Hey Good Lookin'                           1
##  7  1952 I'll Never Get Out of This World Alive     1
##  8  1952 Jambalaya (On the Bayou)                   1
##  9  1953 Kaw-Liga                                   1
## 10  1953 Take These Chains from My Heart            1
## 11  1953 Your Cheatin' Heart                        1
"michael" = list of data frames, one for each album released by Michael Jackson

$`Got to Be There`
# A tibble: 10 × 2
                                song  peak
                               <chr> <int>
1                  Ain'T No Sunshine    NA
2           I Wanna be Where You Are    NA
3  Girl Don't Take Your Love from Me    NA
4                   In Our Small Way    NA
5                    Got to Be There     4
6                      Rockin' Robin     2
7                   Wings of My Love    NA
8      Maria (You Were the Only One)    NA
9   Love is Here and Now You're Gone    NA
10               You've Got a Friend    NA

$Ben
# A tibble: 10 × 2
                              song  peak
                             <chr> <int>
1                              Ben     1
2           Greatest Show On Earth    NA
3  People Make the World Go 'Round    NA
4           We've Got a Good Thing    NA
5      Everybody's Somebody's Fool    NA
6                          My Girl    NA
7    What Goes Around Comes Around    NA
8                 In Our Small Way    NA
9        Shoo-Be-Doo-Be-Doo-Da-Day    NA
10      You Can Cry On My Shoulder    NA

$`Music & Me`
# A tibble: 10 × 2
                     song  peak
                    <chr> <int>
1    With a Child's Heart    50
2                Up Again    NA
3  All the Things You Are    NA
4                   Happy    NA
5               Too Young    NA
6          Doggin' Around    NA
7                Euphoria    NA
8            Morning Glow    NA
9            Johnny Raven    NA
10           Music and Me    NA

$`Forever, Michael`
# A tibble: 10 × 2
                       song  peak
                      <chr> <int>
1        We're Almost There    54
2              Take Me Back    NA
3      One Day in Your Life    NA
4    Cinderella Stay Awhile    NA
5         We've Got Forever    NA
6  Just a Little Bit of You    23
7             You Are There    NA
8                Dapper-Dan    NA
9              Dear Michael    NA
10    I'll Come Home to You    NA

$`Off the Wall`
# A tibble: 10 × 2
                             song  peak
                            <chr> <int>
1  Don't Stop 'Til You Get Enough     1
2                   Rock with You     1
3           Working Day and Night    NA
4                Get on the Floor    NA
5                    Off the Wall    10
6                      Girlfriend    NA
7            She's Out of My Life    10
8                 I Can't Help It    NA
9        It's the Falling in Love    NA
10            Burn This Disco Out    NA

$Thriller
# A tibble: 9 × 2
                         song  peak
                        <chr> <int>
1  Wanna Be Startin' Somethin     5
2                Baby be Mine    NA
3            The Girl is Mine     2
4                    Thriller     4
5                     Beat It     1
6                 Billie Jean     1
7                Human Nature     7
8 P.Y.T. (Pretty Young Thing)    10
9         The Lady in My Life    NA

$Bad
# A tibble: 10 × 2
                           song  peak
                          <chr> <int>
1                           Bad     1
2      The Way You Make Me Feel     1
3                   Speed Demon    NA
4                 Liberian Girl    NA
5             Just Good Friends    NA
6            Another Part of Me    11
7             Man in the Mirror     1
8  I Just Can't Stop Loving You     1
9                   Dirty Diana     1
10              Smooth Criminal     7

$Dangerous
# A tibble: 14 × 2
                       song  peak
                      <chr> <int>
1                       Jam    26
2  Why You Wanna Trip on Me    NA
3             In the Closet     6
4        She Drives Me Wild    NA
5         Remember the Time     3
6    Can't Let Her Get Away    NA
7            Heal the World    27
8            Black or White     1
9                 Who Is It    14
10            Give In to Me    NA
11        Will You Be There     7
12           Keep the Faith    NA
13            Gone Too Soon    NA
14                Dangerous    NA

$HIStory
# A tibble: 30 × 2
                           song  peak
                          <chr> <int>
1                   Billie Jean    NA
2      The Way You Make Me Feel    NA
3                Black or White    NA
4                 Rock with You    NA
5          She's Out of My Life    NA
6                           Bad    NA
7  I Just Can't Stop Loving You    NA
8             Man in the Mirror    NA
9                      Thriller    NA
10                      Beat It    NA
# ... with 20 more rows

$Invincible
# A tibble: 16 × 2
                song  peak
               <chr> <int>
1        Unbreakable    NA
2       Heartbreaker    NA
3         Invincible    NA
4      Break of Dawn    NA
5    Heaven Can Wait    NA
6  You Rock My World    10
7        Butterflies    NA
8         Speechless    NA
9         2000 Watts    NA
10   You Are My Life    NA
11           Privacy    NA
12   Don't Walk Away    NA
13               Cry    NA
14 The Lost Children    NA
15  Whatever Happens    NA
16        Threatened    NA

call to a dplyr function, which should bind the datasets in the list into a single data frame, adding an album column as it does.

# as_data_frame(michael) %>% # -- INCORRECT
# because
# combines a list of column vectors into a data frame
# but
# michael is a list of data frames
# multiple datasets in a list =>
bind_rows(michael, .id = "album") %>% # -- CORRECT
    group_by(album) %>% 
        mutate(rank = min_rank(peak)) %>% 
            filter(rank == 1) %>% 
                select(-rank, -peak)
                
                
Source: local data frame [16 x 2]
Groups: album [10]

              album                           song
              <chr>                          <chr>
1   Got to Be There                  Rockin' Robin
2               Ben                            Ben
3        Music & Me           With a Child's Heart
4  Forever, Michael       Just a Little Bit of You
5      Off the Wall Don't Stop 'Til You Get Enough
6      Off the Wall                  Rock with You
7          Thriller                        Beat It
8          Thriller                    Billie Jean
9               Bad                            Bad
10              Bad       The Way You Make Me Feel
11              Bad              Man in the Mirror
12              Bad   I Just Can't Stop Loving You
13              Bad                    Dirty Diana
14        Dangerous                 Black or White
15          HIStory              You Are Not Alone
16       Invincible              You Rock My World

Atomic Data Types

typeof(TRUE) # logical
## [1] "logical"
typeof("hello") # character i.e. string
## [1] "character"
typeof(3.14) # double i.e. numeric or decimal
## [1] "double"
typeof(1L) # integer i.e. numeric w/o decimal
## [1] "integer"
typeof(1 + 2i)
## [1] "complex"
typeof(raw(1))
## [1] "raw"

coercion

  • base R’s coercion behavior:
    • R will coerce logicals and numerics to strings if strings are present.
    • R will coerce logicals to numerics if numerics are present.
    • as.character() will coerce factors to their labels as strings if strings are present.
    • as.numeric() will coerce factors to their levels as numerics if numerics are present.

dplyer’s coercion behavior:

  • dplyr functions will not automatically coerce data types
    • returns and error
    • expects you to manyally coerce data
  • Exception: factors
    • dplyr coverts non-aligning factors to strings
    • gives warning message
  • dplyr’s two table functions will…
    • coerce two factors to a string if the factors have different levels.
    • coerce two factors to a string if the factors have the same levels in a different order.
    • return an error instead of coercing logicals, integers, and numerics to a character.
  • Character (string) & Integer or Double or Logical => Character

  • Double & Integer or Logical => Double

  • Integer & Logical => Integer

> seventies 
# A tibble: 10 × 3
     year                      album                 band
   <fctr>                      <chr>                <chr>
1    1970 Bridge Over Troubled Water  Simon and Garfunkel
2    1971     Jesus Christ Superstar      Various Artists
3    1972                    Harvest           Neil Young
4    1973      The World is a Ghetto                  War
5    1974  Goodbye Yellow Brick Road           Elton John
6    1975 Elton John's Greatest Hits           Elton John
7    1976             Peter Frampton Frampton Comes Alive
8    1977                    Rumours        Fleetwood Mac
9    1978       Saturday Night Fever             Bee Gees
10   1979                 Billy Joel          52nd Street
> seventies %>% 
+   mutate(year = as.numeric(year))
+ 
# A tibble: 10 × 3
    year                      album                 band
   <dbl>                      <chr>                <chr>
1      1 Bridge Over Troubled Water  Simon and Garfunkel
2      2     Jesus Christ Superstar      Various Artists
3      3                    Harvest           Neil Young
4      4      The World is a Ghetto                  War
5      5  Goodbye Yellow Brick Road           Elton John
6      6 Elton John's Greatest Hits           Elton John
7      7             Peter Frampton Frampton Comes Alive
8      8                    Rumours        Fleetwood Mac
9      9       Saturday Night Fever             Bee Gees
10    10                 Billy Joel          52nd Street
> seventies %>% 
    # Coerce seventies$year into a useful numeric
    mutate(year = as.numeric(as.character(year)))
# A tibble: 10 × 3
    year                      album                 band
   <dbl>                      <chr>                <chr>
1   1970 Bridge Over Troubled Water  Simon and Garfunkel
2   1971     Jesus Christ Superstar      Various Artists
3   1972                    Harvest           Neil Young
4   1973      The World is a Ghetto                  War
5   1974  Goodbye Yellow Brick Road           Elton John
6   1975 Elton John's Greatest Hits           Elton John
7   1976             Peter Frampton Frampton Comes Alive
8   1977                    Rumours        Fleetwood Mac
9   1978       Saturday Night Fever             Bee Gees
10  1979                 Billy Joel          52nd Street