1 Programming Constructs

1.1 Hello World

say.hello <- function() {
    print("Hello World")
}

say.hello

## function() {
##     print("Hello World")
## }

say.hello()

## [1] "Hello World"

rm(list = ls())

1.2 Function Arguments

sprintf("Hello %s", "Naimish")

## [1] "Hello Naimish"

sprintf("Hello %s, Today is %s", "Naimish", Sys.Date())

## [1] "Hello Naimish, Today is 2015-07-03"

hello.person <- function(name) {
    sprintf("Hello %s", name)
}

hello.person("Naimish")

## [1] "Hello Naimish"

hello.person <- function(first, last) {
    sprintf("Hello %s %s", first, last)
}

hello.person("Naimish", "Agarwal")

## [1] "Hello Naimish Agarwal"

hello.person(first = "Naimish", last = "Agarwal")

## [1] "Hello Naimish Agarwal"

hello.person(last = "Agarwal", "Naimish")

## [1] "Hello Naimish Agarwal"

hello.person <- function(first, last = "Agarwal") {
    sprintf("Hello %s %s", first, last)
}

hello.person("Naimish")

## [1] "Hello Naimish Agarwal"

hello.person <- function(first, last = "Agarwal", ...) {
    sprintf("Hello %s %s", first, last)
}

hello.person("Naimish", "Agarwal", "How are you?")

## [1] "Hello Naimish Agarwal"

1.3 Return values from functions

double.num <- function(x) {
    x * 2
}

double.num(3)

## [1] 6

double.num <- function(x) {
    return(x * 2)
}

double.num(5)

## [1] 10

double.num <- function(x) {
    return(x * 2)
    
    # Not executed
    print("Hello")
    
    
    # Not executed
    return(17)
}

double.num(12)

## [1] 24

rm(list = ls())

1.4 `do.call` function

hello.person <- function(first, last  = "Agarwal") {
    sprintf("Hello %s %s", first, last)
}

hello.person("Naimish")

## [1] "Hello Naimish Agarwal"

do.call(what = hello.person, args = list(first = "Naimish"))

## [1] "Hello Naimish Agarwal"

do.call(what = "hello.person", args = list(first = "Naimish"))

## [1] "Hello Naimish Agarwal"

run.this <- function(x, func = mean) {
    do.call(what = func, args = list(x))
}

run.this(1:10)

## [1] 5.5

run.this(1:100, func = sum)

## [1] 5050

run.this(rnorm(100), func = sd)

## [1] 0.9791893

1.5 `if`-`else` statement

check.bool <- function(x) {
    if (x == 1) {
        print("Hello")
    } else {
        print("Bye")
    }
}


check.bool(1)

## [1] "Hello"

check.bool(2)

## [1] "Bye"

check.bool <- function(x) {
    if (x == 1) {
        print("is 1")
    } else if (x == 2) {
        print("is 2")
    } else {
        print("is something else")
    }
}

check.bool(1)

## [1] "is 1"

check.bool(2)

## [1] "is 2"

check.bool(3)

## [1] "is something else"

1.6 `switch` statement

use.switch <- function(x) {
    switch(x,
           "a" = "first",
           "b" = "second",
           "c" = "third",
           "other")
}

use.switch("a")

## [1] "first"

use.switch("ask")

## [1] "other"

use.switch(1)

## [1] "first"

use.switch(2)

## [1] "second"

use.switch(3)

## [1] "third"

use.switch(4)

## [1] "other"

use.switch(5)

is.null(use.switch(5))

## [1] TRUE

1.7 Checks on Vectors

ifelse(1 == 1, "yes", "no")

## [1] "yes"

ifelse(1 == 0, "yes", "no")

## [1] "no"

x <- 1:10

ifelse(x <= 5, "yes", "no")

##  [1] "yes" "yes" "yes" "yes" "yes" "no"  "no"  "no"  "no"  "no"

ifelse(x > 5, x * 3, x ** 3)

##  [1]   1   8  27  64 125  18  21  24  27  30

ifelse(x < 5, "Great", sqrt(x))

##  [1] "Great"            "Great"            "Great"           
##  [4] "Great"            "2.23606797749979" "2.44948974278318"
##  [7] "2.64575131106459" "2.82842712474619" "3"               
## [10] "3.16227766016838"

x[3:6] <- NA

x

##  [1]  1  2 NA NA NA NA  7  8  9 10

ifelse(x < 5, "yes", "no")

##  [1] "yes" "yes" NA    NA    NA    NA    "no"  "no"  "no"  "no"

1.8 Compound Statement Checks

a <- c(1, 1, 0, 1, 1)

b <- c(2, 1, 0, 1)

ifelse(a == 1 & b == 1, "yes", "no")

## Warning in a == 1 & b == 1: longer object length is not a multiple of
## shorter object length

## [1] "no"  "yes" "no"  "yes" "no"

ifelse(a == 1 && b == 1, "yes", "no")

## [1] "no"

x <- 1

y <- 2

# No short-circuiting
if (x == 0 & y == 3) {
    print("Hello")
}


# With short-circuiting
if (x == 0 && y == 3) {
    print("Hello")
}

# With short-circuiting
if (x == 1 || y == 3) {
    print("Hello")
}

## [1] "Hello"

ifelse(b == 1, "Hi", ifelse(b == 0, "Bye", "GoodBye"))

## [1] "GoodBye" "Hi"      "Bye"     "Hi"

1.9 `for` loop

for (i in 1:10) {
    print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

for (i in c("Hello", "Bye")) {
    print(i)
}

## [1] "Hello"
## [1] "Bye"

for (i in seq(from = 20, to = 30, length.out = 20)) {
    print(i)
}

## [1] 20
## [1] 20.52632
## [1] 21.05263
## [1] 21.57895
## [1] 22.10526
## [1] 22.63158
## [1] 23.15789
## [1] 23.68421
## [1] 24.21053
## [1] 24.73684
## [1] 25.26316
## [1] 25.78947
## [1] 26.31579
## [1] 26.84211
## [1] 27.36842
## [1] 27.89474
## [1] 28.42105
## [1] 28.94737
## [1] 29.47368
## [1] 30

x <- 1:5

names(x) <- letters[1:5]

x

## a b c d e 
## 1 2 3 4 5

for (i in letters[1:5]) {
    print(x[i])
}

## a 
## 1 
## b 
## 2 
## c 
## 3 
## d 
## 4 
## e 
## 5

## [1] "e"

x <- 1:10

y <- seq(1, 10, 1)

identical(x, y)

## [1] FALSE

1.10 `while` loop

x <- 1

while (x < 10) {
    print(x) 
    x <- x + 3
}

## [1] 1
## [1] 4
## [1] 7

1.11 `break` and `next`

for (i in 1:10) {
    if (i %% 2 == 0) {
        next
    }
    print(i)
}

## [1] 1
## [1] 3
## [1] 5
## [1] 7
## [1] 9

for (i in 1:10) {
    if (i == 4) {
        break
    }
    print(i)
}

## [1] 1
## [1] 2
## [1] 3

2 Data Wrangling

2.1 `apply` function

mat <- matrix(data = 1:9, nrow = 3)

mat

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

# Sum over columns
apply(X = mat, FUN = sum, MARGIN = 2)

## [1]  6 15 24

# Sum over rows
apply(X = mat, FUN = sum, MARGIN = 1)

## [1] 12 15 18

colSums(x = mat)

## [1]  6 15 24

rowSums(x = mat)

## [1] 12 15 18

mat[2, 1] <- NA

mat

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]   NA    5    8
## [3,]    3    6    9

rowSums(x = mat)

## [1] 12 NA 18

rowSums(x = mat, na.rm = TRUE)

## [1] 12 13 18

apply(X = mat, MARGIN = 1, FUN = sum, na.rm = TRUE)

## [1] 12 13 18

2.2 `lapply` function

lis <- list(A = matrix(data = 1:9, nrow = 3), B = 1:5, C = matrix(data = 1:4, nrow = 2), D = 2)

lis

## $A
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
## 
## $B
## [1] 1 2 3 4 5
## 
## $C
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
## 
## $D
## [1] 2

lapply(X = lis, FUN = sum)

## $A
## [1] 45
## 
## $B
## [1] 15
## 
## $C
## [1] 10
## 
## $D
## [1] 2

names <- c("N1", "N2", "N3")

lapply(X = names, FUN = nchar)

## [[1]]
## [1] 2
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 2

2.3 `sapply` function

sapply(X = lis, FUN = sum)

##  A  B  C  D 
## 45 15 10  2

2.4 `mapply` function

x <- matrix(data = 1:16, nrow = 4)

y <- matrix(data = 1:16, nrow = 2)

list1 <- list(A = x, B = y, C = 5:1)

list1

## $A
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
## 
## $B
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]    1    3    5    7    9   11   13   15
## [2,]    2    4    6    8   10   12   14   16
## 
## $C
## [1] 5 4 3 2 1

list2 <- list(A = matrix(1:16, 4), B = matrix(1:16, 8), C = 15:1)

list2

## $A
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
## 
## $B
##      [,1] [,2]
## [1,]    1    9
## [2,]    2   10
## [3,]    3   11
## [4,]    4   12
## [5,]    5   13
## [6,]    6   14
## [7,]    7   15
## [8,]    8   16
## 
## $C
##  [1] 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1

mapply(FUN = identical, list1, list2)

##     A     B     C 
##  TRUE FALSE FALSE

addNROWS <- function(x, y) {
    NROW(x) + NROW(y)
}

mapply(FUN = addNROWS, list1, list2)

##  A  B  C 
##  8 10 20

rm(list = ls())

2.5 `aggregate` function

library(ggplot2)

data("diamonds")

head(diamonds)

##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48

mean(diamonds$price)

## [1] 3932.8

aggregate(formula = price ~ cut, data = diamonds, FUN = mean, na.rm = TRUE)

##         cut    price
## 1      Fair 4358.758
## 2      Good 3928.864
## 3 Very Good 3981.760
## 4   Premium 4584.258
## 5     Ideal 3457.542

aggregate(formula = price ~ cut + color, data = diamonds, FUN = mean, na.rm = TRUE)

##          cut color    price
## 1       Fair     D 4291.061
## 2       Good     D 3405.382
## 3  Very Good     D 3470.467
## 4    Premium     D 3631.293
## 5      Ideal     D 2629.095
## 6       Fair     E 3682.312
## 7       Good     E 3423.644
## 8  Very Good     E 3214.652
## 9    Premium     E 3538.914
## 10     Ideal     E 2597.550
## 11      Fair     F 3827.003
## 12      Good     F 3495.750
## 13 Very Good     F 3778.820
## 14   Premium     F 4324.890
## 15     Ideal     F 3374.939
## 16      Fair     G 4239.255
## 17      Good     G 4123.482
## 18 Very Good     G 3872.754
## 19   Premium     G 4500.742
## 20     Ideal     G 3720.706
## 21      Fair     H 5135.683
## 22      Good     H 4276.255
## 23 Very Good     H 4535.390
## 24   Premium     H 5216.707
## 25     Ideal     H 3889.335
## 26      Fair     I 4685.446
## 27      Good     I 5078.533
## 28 Very Good     I 5255.880
## 29   Premium     I 5946.181
## 30     Ideal     I 4451.970
## 31      Fair     J 4975.655
## 32      Good     J 4574.173
## 33 Very Good     J 5103.513
## 34   Premium     J 6294.592
## 35     Ideal     J 4918.186

aggregate(formula = cbind(price, carat) ~ cut, data = diamonds, FUN = mean, na.rm = TRUE)

##         cut    price     carat
## 1      Fair 4358.758 1.0461366
## 2      Good 3928.864 0.8491847
## 3 Very Good 3981.760 0.8063814
## 4   Premium 4584.258 0.8919549
## 5     Ideal 3457.542 0.7028370

aggregate(cbind(price, carat) ~ cut + color, diamonds, mean, na.rm = TRUE)

##          cut color    price     carat
## 1       Fair     D 4291.061 0.9201227
## 2       Good     D 3405.382 0.7445166
## 3  Very Good     D 3470.467 0.6964243
## 4    Premium     D 3631.293 0.7215471
## 5      Ideal     D 2629.095 0.5657657
## 6       Fair     E 3682.312 0.8566071
## 7       Good     E 3423.644 0.7451340
## 8  Very Good     E 3214.652 0.6763167
## 9    Premium     E 3538.914 0.7177450
## 10     Ideal     E 2597.550 0.5784012
## 11      Fair     F 3827.003 0.9047115
## 12      Good     F 3495.750 0.7759296
## 13 Very Good     F 3778.820 0.7409612
## 14   Premium     F 4324.890 0.8270356
## 15     Ideal     F 3374.939 0.6558285
## 16      Fair     G 4239.255 1.0238217
## 17      Good     G 4123.482 0.8508955
## 18 Very Good     G 3872.754 0.7667986
## 19   Premium     G 4500.742 0.8414877
## 20     Ideal     G 3720.706 0.7007146
## 21      Fair     H 5135.683 1.2191749
## 22      Good     H 4276.255 0.9147293
## 23 Very Good     H 4535.390 0.9159485
## 24   Premium     H 5216.707 1.0164492
## 25     Ideal     H 3889.335 0.7995249
## 26      Fair     I 4685.446 1.1980571
## 27      Good     I 5078.533 1.0572222
## 28 Very Good     I 5255.880 1.0469518
## 29   Premium     I 5946.181 1.1449370
## 30     Ideal     I 4451.970 0.9130291
## 31      Fair     J 4975.655 1.3411765
## 32      Good     J 4574.173 1.0995440
## 33 Very Good     J 5103.513 1.1332153
## 34   Premium     J 6294.592 1.2930941
## 35     Ideal     J 4918.186 1.0635937

rm(list = ls())

2.6 `plyr` package

library(plyr)

data("baseball")

head(baseball)

##            id year stint team lg  g  ab  r  h X2b X3b hr rbi sb cs bb so
## 4   ansonca01 1871     1  RC1    25 120 29 39  11   3  0  16  6  2  2  1
## 44  forceda01 1871     1  WS3    32 162 45 45   9   4  0  29  8  0  4  0
## 68  mathebo01 1871     1  FW1    19  89 15 24   3   1  0  10  2  1  2  0
## 99  startjo01 1871     1  NY2    33 161 35 58   5   1  1  34  4  2  3  0
## 102 suttoez01 1871     1  CL1    29 128 35 45   3   7  3  23  3  1  1  0
## 106 whitede01 1871     1  CL1    29 146 40 47   6   5  1  21  2  2  4  1
##     ibb hbp sh sf gidp
## 4    NA  NA NA NA   NA
## 44   NA  NA NA NA   NA
## 68   NA  NA NA NA   NA
## 99   NA  NA NA NA   NA
## 102  NA  NA NA NA   NA
## 106  NA  NA NA NA   NA

baseball$sf[baseball$year < 1954] <- 0

any(is.na(baseball$sf))

## [1] FALSE

baseball$hbp[is.na(baseball$hbp)] <- 0

any(is.na(baseball$hbp))

## [1] FALSE

baseball <- baseball[baseball$ab >= 50, ]

head(baseball)

##            id year stint team lg  g  ab  r  h X2b X3b hr rbi sb cs bb so
## 4   ansonca01 1871     1  RC1    25 120 29 39  11   3  0  16  6  2  2  1
## 44  forceda01 1871     1  WS3    32 162 45 45   9   4  0  29  8  0  4  0
## 68  mathebo01 1871     1  FW1    19  89 15 24   3   1  0  10  2  1  2  0
## 99  startjo01 1871     1  NY2    33 161 35 58   5   1  1  34  4  2  3  0
## 102 suttoez01 1871     1  CL1    29 128 35 45   3   7  3  23  3  1  1  0
## 106 whitede01 1871     1  CL1    29 146 40 47   6   5  1  21  2  2  4  1
##     ibb hbp sh sf gidp
## 4    NA   0 NA  0   NA
## 44   NA   0 NA  0   NA
## 68   NA   0 NA  0   NA
## 99   NA   0 NA  0   NA
## 102  NA   0 NA  0   NA
## 106  NA   0 NA  0   NA

baseball$OBP <- with(data = baseball, expr = (h + bb + hbp) / (ab + bb + hbp + sf))

head(baseball)

##            id year stint team lg  g  ab  r  h X2b X3b hr rbi sb cs bb so
## 4   ansonca01 1871     1  RC1    25 120 29 39  11   3  0  16  6  2  2  1
## 44  forceda01 1871     1  WS3    32 162 45 45   9   4  0  29  8  0  4  0
## 68  mathebo01 1871     1  FW1    19  89 15 24   3   1  0  10  2  1  2  0
## 99  startjo01 1871     1  NY2    33 161 35 58   5   1  1  34  4  2  3  0
## 102 suttoez01 1871     1  CL1    29 128 35 45   3   7  3  23  3  1  1  0
## 106 whitede01 1871     1  CL1    29 146 40 47   6   5  1  21  2  2  4  1
##     ibb hbp sh sf gidp       OBP
## 4    NA   0 NA  0   NA 0.3360656
## 44   NA   0 NA  0   NA 0.2951807
## 68   NA   0 NA  0   NA 0.2857143
## 99   NA   0 NA  0   NA 0.3719512
## 102  NA   0 NA  0   NA 0.3565891
## 106  NA   0 NA  0   NA 0.3400000

tail(baseball)

##              id year stint team lg   g  ab  r   h X2b X3b hr rbi sb cs  bb
## 89499 claytro01 2007     1  TOR AL  69 189 23  48  14   0  1  12  2  1  14
## 89502 cirilje01 2007     1  MIN AL  50 153 18  40   9   2  2  21  2  0  15
## 89521 bondsba01 2007     1  SFN NL 126 340 75  94  14   0 28  66  5  0 132
## 89523 biggicr01 2007     1  HOU NL 141 517 68 130  31   3 10  50  4  3  23
## 89530 ausmubr01 2007     1  HOU NL 117 349 38  82  16   3  3  25  6  1  37
## 89533  aloumo01 2007     1  NYN NL  87 328 51 112  19   1 13  49  3  0  27
##        so ibb hbp sh sf gidp       OBP
## 89499  50   0   1  3  3    8 0.3043478
## 89502  13   0   1  3  2    9 0.3274854
## 89521  54  43   3  0  2   13 0.4800839
## 89523 112   0   3  7  5    5 0.2846715
## 89530  74   3   6  4  1   11 0.3180662
## 89533  30   5   2  0  3   13 0.3916667

obp <- function(data) {
    c(OBP = with(data, sum(h, bb, hbp) / sum(ab, bb, hbp, sf)))
}

# Split-apply-combine praradigm
careerOBP <- ddply(.data = baseball, .variables = "id", .fun = obp)

head(careerOBP)

##          id       OBP
## 1 aaronha01 0.3739493
## 2 adairje01 0.2922746
## 3 adamsba01 0.2550694
## 4 adamsbo03 0.3403342
## 5 adcocjo01 0.3368726
## 6 aguilri01 0.2037037

careerOBP <- careerOBP[order(careerOBP$OBP, decreasing = TRUE), ]

head(careerOBP)

##             id       OBP
## 1089 willite01 0.4816861
## 875   ruthba01 0.4742209
## 658  mcgrajo01 0.4657478
## 356  gehrilo01 0.4477848
## 85   bondsba01 0.4444622
## 476  hornsro01 0.4339068

tail(careerOBP)

##            id        OBP
## 7   aguirha01 0.10600707
## 204 cormirh01 0.10169492
## 681 millebo04 0.10000000
## 55  bedrost01 0.09230769
## 75  bielemi01 0.08219178
## 478 houghch01 0.07575758

lis <- list(A = matrix(1:9, 3), B = 1:5, C = matrix(1:4, 2), D = 2)

lis

## $A
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
## 
## $B
## [1] 1 2 3 4 5
## 
## $C
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
## 
## $D
## [1] 2

lapply(X = lis, FUN = sum)

## $A
## [1] 45
## 
## $B
## [1] 15
## 
## $C
## [1] 10
## 
## $D
## [1] 2

llply(.data = lis, .fun = sum)

## $A
## [1] 45
## 
## $B
## [1] 15
## 
## $C
## [1] 10
## 
## $D
## [1] 2

identical(lapply(X = lis, FUN = sum), llply(.data = lis, .fun = sum))

## [1] TRUE

sapply(X = lis, FUN = sum)

##  A  B  C  D 
## 45 15 10  2

laply(.data = lis, .fun = sum)

## [1] 45 15 10  2

class(laply(.data = lis, .fun = sum))

## [1] "numeric"

library(ggplot2)

data(diamonds)

aggregate(price ~ cut, diamonds, each(mean, median))

##         cut price.mean price.median
## 1      Fair   4358.758     3282.000
## 2      Good   3928.864     3050.500
## 3 Very Good   3981.760     2648.000
## 4   Premium   4584.258     3185.000
## 5     Ideal   3457.542     1810.000

numcolwise(.fun = sum, na.rm = TRUE)(diamonds)

##      carat   depth   table     price        x        y        z
## 1 43040.87 3330763 3099241 212135217 309138.6 309320.3 190879.3

head(diamonds[, sapply(diamonds, is.numeric)])

##   carat depth table price    x    y    z
## 1  0.23  61.5    55   326 3.95 3.98 2.43
## 2  0.21  59.8    61   326 3.89 3.84 2.31
## 3  0.23  56.9    65   327 4.05 4.07 2.31
## 4  0.29  62.4    58   334 4.20 4.23 2.63
## 5  0.31  63.3    58   335 4.34 4.35 2.75
## 6  0.24  62.8    57   336 3.94 3.96 2.48

sapply(diamonds[, sapply(diamonds, is.numeric)], mean)

##        carat        depth        table        price            x 
##    0.7979397   61.7494049   57.4571839 3932.7997219    5.7311572 
##            y            z 
##    5.7345260    3.5387338

2.7 Combining Datasets

sport <- c("Hockey", "Baseball", "Football")

league <- c("NHL", "MLB", "NFL")

trophy <- c("Stanley Cup", "Commisioner's Cup", "Vinci Lombardi Trophy")

sports1 <- cbind(sport, league, trophy)

sports1

##      sport      league trophy                 
## [1,] "Hockey"   "NHL"  "Stanley Cup"          
## [2,] "Baseball" "MLB"  "Commisioner's Cup"    
## [3,] "Football" "NFL"  "Vinci Lombardi Trophy"

class(sports1)

## [1] "matrix"

sports2 <- data.frame(sport = c("Basketball", "Golf"), league = c("NBA", "PGA"), trophy = c("T1", "T2"))

sports2

##        sport league trophy
## 1 Basketball    NBA     T1
## 2       Golf    PGA     T2

sports <- rbind(sports1, sports2)

sports

##        sport league                trophy
## 1     Hockey    NHL           Stanley Cup
## 2   Baseball    MLB     Commisioner's Cup
## 3   Football    NFL Vinci Lombardi Trophy
## 4 Basketball    NBA                    T1
## 5       Golf    PGA                    T2

2.8 Join Datasets

fileURL <- "http://www.jaredlander.com/data/countryCodes.csv"

codes <- read.csv(file = fileURL, stringsAsFactors = FALSE)

countries <- read.csv(file = "http://www.jaredlander.com/data/GovType.csv", stringsAsFactors = FALSE)

head(codes)

##   Code         Country.name Year ccTLD    ISO.3166.2
## 1   AD              Andorra 1974   .ad ISO 3166-2:AD
## 2   AE United Arab Emirates 1974   .ae ISO 3166-2:AE
## 3   AF          Afghanistan 1974   .af ISO 3166-2:AF
## 4   AG  Antigua and Barbuda 1974   .ag ISO 3166-2:AG
## 5   AI             Anguilla 1983   .ai ISO 3166-2:AI
## 6   AL              Albania 1974   .al ISO 3166-2:AL
##                                             Notes
## 1                                                
## 2                                                
## 3                                                
## 4                                                
## 5 AI previously represented French Afar and Issas
## 6

head(countries)

##               Country
## 1         Afghanistan
## 2             Albania
## 3             Algeria
## 4             Andorra
## 5              Angola
## 6 Antigua and Barbuda
##                                                                               GovernmentType
## 1                                                                           Islamic republic
## 2                                                                    parliamentary democracy
## 3                                                                                   republic
## 4                                                                    parliamentary democracy
## 5                                                   republic; multiparty presidential regime
## 6 constitutional monarchy with a parliamentary system of government and a Commonwealth realm

# View(codes)

countryMerged <- merge(codes, countries, by.x = "Country.name", by.y = "Country")

head(countryMerged)

##          Country.name Code Year ccTLD    ISO.3166.2
## 1         Afghanistan   AF 1974   .af ISO 3166-2:AF
## 2             Albania   AL 1974   .al ISO 3166-2:AL
## 3             Algeria   DZ 1974   .dz ISO 3166-2:DZ
## 4             Andorra   AD 1974   .ad ISO 3166-2:AD
## 5              Angola   AO 1974   .ao ISO 3166-2:AO
## 6 Antigua and Barbuda   AG 1974   .ag ISO 3166-2:AG
##                                    Notes
## 1                                       
## 2                                       
## 3 Code taken from name in Kabyle: Dzayer
## 4                                       
## 5                                       
## 6                                       
##                                                                               GovernmentType
## 1                                                                           Islamic republic
## 2                                                                    parliamentary democracy
## 3                                                                                   republic
## 4                                                                    parliamentary democracy
## 5                                                   republic; multiparty presidential regime
## 6 constitutional monarchy with a parliamentary system of government and a Commonwealth realm

library(plyr)

codes <- rename(x = codes, replace = c(Country.name = "Country"))

countryJoined <- join(x = codes, y = countries, by = "Country")

head(countryJoined)

##   Code              Country Year ccTLD    ISO.3166.2
## 1   AD              Andorra 1974   .ad ISO 3166-2:AD
## 2   AE United Arab Emirates 1974   .ae ISO 3166-2:AE
## 3   AF          Afghanistan 1974   .af ISO 3166-2:AF
## 4   AG  Antigua and Barbuda 1974   .ag ISO 3166-2:AG
## 5   AI             Anguilla 1983   .ai ISO 3166-2:AI
## 6   AL              Albania 1974   .al ISO 3166-2:AL
##                                             Notes
## 1                                                
## 2                                                
## 3                                                
## 4                                                
## 5 AI previously represented French Afar and Issas
## 6                                                
##                                                                                                          GovernmentType
## 1                                                                                               parliamentary democracy
## 2 federation with specified powers delegated to the UAE federal government and other powers reserved to member emirates
## 3                                                                                                      Islamic republic
## 4                            constitutional monarchy with a parliamentary system of government and a Commonwealth realm
## 5                                                                                                                  <NA>
## 6                                                                                               parliamentary democracy

# View(countryJoined)

2.9 `reshape2` package

library(reshape2)

data("airquality")

head(airquality)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

airmelt <- melt(data = airquality, id.vars = c("Month", "Day"), value.name = "Value", variable.name = "Metric")

head(airmelt)

##   Month Day Metric Value
## 1     5   1  Ozone    41
## 2     5   2  Ozone    36
## 3     5   3  Ozone    12
## 4     5   4  Ozone    18
## 5     5   5  Ozone    NA
## 6     5   6  Ozone    28

dim(airquality)

## [1] 153   6

dim(airmelt)

## [1] 612   4

airCast <- dcast(data = airmelt, formula = Month + Day ~  Metric, value.var = "Value")

head(airCast)

##   Month Day Ozone Solar.R Wind Temp
## 1     5   1    41     190  7.4   67
## 2     5   2    36     118  8.0   72
## 3     5   3    12     149 12.6   74
## 4     5   4    18     313 11.5   62
## 5     5   5    NA      NA 14.3   56
## 6     5   6    28      NA 14.9   66

airCast <- airCast[, c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")]

head(airCast)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

R Programming: Basics to Advanced (Part 2), First Edition

Naimish Agarwal

1 Programming Constructs

1.1 Hello World

1.2 Function Arguments

1.3 Return values from functions

1.4 do.call function

1.5 if-else statement

1.6 switch statement

1.7 Checks on Vectors

1.8 Compound Statement Checks

1.9 for loop

1.10 while loop

1.11 break and next

2 Data Wrangling

2.1 apply function

2.2 lapply function

2.3 sapply function

2.4 mapply function

2.5 aggregate function

2.6 plyr package

2.7 Combining Datasets

2.8 Join Datasets

2.9 reshape2 package

1.4 `do.call` function

1.5 `if`-`else` statement

1.6 `switch` statement

1.9 `for` loop

1.10 `while` loop

1.11 `break` and `next`

2.1 `apply` function

2.2 `lapply` function

2.3 `sapply` function

2.4 `mapply` function

2.5 `aggregate` function

2.6 `plyr` package

2.9 `reshape2` package