library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)

Videos

Conditionals, controls, and functions

if(){} else

if(TRUE) {
  print("true")
} else { # CAUTION!!! else has to be right after the closing bracket of the previous section
  print("false")
} 
## [1] "true"

Professor’s example

price <- 15.99

if (price < 10) {
  print ("This is an excellent deal!")
} else if (price < 11) {
  print ("This is a fair price")
} else if (price < 12) {
  print("This product is slightly overpriced")
} else {
  print("This product is overpriced")
}
## [1] "This product is overpriced"

ifelse()

Returns vector of same length

a <- c(NA,7, 2, 9)
ifelse(is.na(a), "NA","Not NA")
## [1] "NA"     "Not NA" "Not NA" "Not NA"

Creating loops

while

x <- 10
while (x>0) {
  print(x)
  x <- x-1
}
## [1] 10
## [1] 9
## [1] 8
## [1] 7
## [1] 6
## [1] 5
## [1] 4
## [1] 3
## [1] 2
## [1] 1
counter <- 0
while (counter < 9){
  print(counter)
  counter <- counter+1
}
## [1] 0
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8

for

Used to iterate over a vector, like column or data frame

for (i in c(1, 2, 3, 4)) {
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
prices <- c(12.43, 9.99, 18.22, 7.25, 0.50)
numCheap <- 0
for (p in prices) {
  if (p < 10) {
    numCheap <- numCheap +1
  }
}
print(numCheap)
## [1] 3

User-defined functions

addTogether <- function(x, y) {
  if ( (hasArg(x) == FALSE) & (hasArg(y)==FALSE))
  {
    print("You didn't enter any values. Please enter two numbers")
  }
  else if ((hasArg(x) == FALSE) | (hasArg(y)==FALSE)) 
    {
    print("You only entered one value. Please enter two values.")
  }
  else if (is.numeric(x) & is.numeric(y)==TRUE)
    {
      x + y
  } else {
      print("Sorry, please enter two numbers.")
    }
}

#call function
addTogether("3",3)
## [1] "Sorry, please enter two numbers."

Apply to all elements of the input

sapply(x, FUN)
Output is either vector or matrix

df1 <- as.data.frame(c(1, 2,3, 4, 5, 6, 7))
sapply(df1,max)
## c(1, 2, 3, 4, 5, 6, 7) 
##                      7

dplyr

  • select() choose columns
  • filter() filter rows
  • mutate() create new columns
  • summarize() aggregate data
  • arrange() sort data
  • group_by group data
sales_data <- read_csv("salesdata.csv")
## New names:
## Rows: 10 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): Region, Product dbl (3): ...1, Sales, Profit date (1): Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
knitr::kable(sales_data)
…1 Date Region Product Sales Profit
1 2021-01-01 North America A 267.7419 38.53529
2 2021-01-02 Europe B 100.6701 69.09701
3 2021-01-03 North America C 357.8518 79.79671
4 2021-01-04 Europe A 121.2791 41.44539
5 2021-01-05 North America B 105.2566 93.81673
6 2021-01-06 Europe C 399.9681 34.36973
7 2021-01-07 North America A 412.3462 78.54400
8 2021-01-08 Europe B 369.8670 65.02684
9 2021-01-09 North America C 241.6270 29.62530
10 2021-01-10 Europe A 286.5879 33.74372

Pipe operator %>%

  • allows chaining operators
  • passes result of one function to the next
kable(sales_data %>%
        filter(Region == "North America") %>%
        select(Date, Product, Sales) %>%
        arrange(desc(Sales)))
Date Product Sales
2021-01-07 A 412.3462
2021-01-03 C 357.8518
2021-01-01 A 267.7419
2021-01-09 C 241.6270
2021-01-05 B 105.2566

select()

choose specific columns

kable(sales_data %>% select(Date, Product, Sales))
Date Product Sales
2021-01-01 A 267.7419
2021-01-02 B 100.6701
2021-01-03 C 357.8518
2021-01-04 A 121.2791
2021-01-05 B 105.2566
2021-01-06 C 399.9681
2021-01-07 A 412.3462
2021-01-08 B 369.8670
2021-01-09 C 241.6270
2021-01-10 A 286.5879

filter()

filter rows by a conditional

kable(sales_data %>% filter(Region == "North America"))
…1 Date Region Product Sales Profit
1 2021-01-01 North America A 267.7419 38.53529
3 2021-01-03 North America C 357.8518 79.79671
5 2021-01-05 North America B 105.2566 93.81673
7 2021-01-07 North America A 412.3462 78.54400
9 2021-01-09 North America C 241.6270 29.62530

mutate()

create new columns

kable(sales_data %>% mutate(Profit_Margin = Profit/Sales))
…1 Date Region Product Sales Profit Profit_Margin
1 2021-01-01 North America A 267.7419 38.53529 0.1439270
2 2021-01-02 Europe B 100.6701 69.09701 0.6863707
3 2021-01-03 North America C 357.8518 79.79671 0.2229881
4 2021-01-04 Europe A 121.2791 41.44539 0.3417356
5 2021-01-05 North America B 105.2566 93.81673 0.8913146
6 2021-01-06 Europe C 399.9681 34.36973 0.0859312
7 2021-01-07 North America A 412.3462 78.54400 0.1904807
8 2021-01-08 Europe B 369.8670 65.02684 0.1758114
9 2021-01-09 North America C 241.6270 29.62530 0.1226076
10 2021-01-10 Europe A 286.5879 33.74372 0.1177430

summarize() and group_by()

aggregate data

kable(sales_data %>% group_by(Product) %>% summarize(Total_Sales = sum(Sales)))
Product Total_Sales
A 1087.9552
B 575.7937
C 999.4469

arrange()

sorts data

kable(sales_data %>% arrange(desc(Sales))) # desc = descend
…1 Date Region Product Sales Profit
7 2021-01-07 North America A 412.3462 78.54400
6 2021-01-06 Europe C 399.9681 34.36973
8 2021-01-08 Europe B 369.8670 65.02684
3 2021-01-03 North America C 357.8518 79.79671
10 2021-01-10 Europe A 286.5879 33.74372
1 2021-01-01 North America A 267.7419 38.53529
9 2021-01-09 North America C 241.6270 29.62530
4 2021-01-04 Europe A 121.2791 41.44539
5 2021-01-05 North America B 105.2566 93.81673
2 2021-01-02 Europe B 100.6701 69.09701

Readings

tryCatch() and stop()

handle potential errors that might occur during the execution

addTogether <- function(x, y) {
  tryCatch({
    if (!is.numeric(x) || !is.numeric(y)) {
      stop("Sorry, please enter two numeric values.")
    }
    x + y
  }, error = function(e) {
    cat("An error occurred:", conditionMessage(e), "\n")
  })
}

# Call the function
addTogether(3)
## An error occurred: argument "y" is missing, with no default

Example

log_and_divide <- function(x, y){
    tryCatch(
        {
        result = log(x) / y
        return(result)
        },
        error=function(e) {
            message('An Error Occurred')
            print(e)
        },
        warning=function(w) {
            message('A Warning Occurred')
            print(w)
            return(NA)
        }
    )
}

log_and_divide(10)
## An Error Occurred
## <simpleError in log_and_divide(10): argument "y" is missing, with no default>
log_and_divide(-10, 2)
## A Warning Occurred
## <simpleWarning in log(x): NaNs produced>
## [1] NA

sapply()

library(tibble)
df1 <-tibble(column1 = c(1,2,3,4,5,6,7))
sapply(df1, max)
## column1 
##       7
df2 <-c(1,2,3,4,5,6,7)
sapply(df2, max)
## [1] 1 2 3 4 5 6 7
takemean <-function(x){
  mean(x, na.rm=TRUE)
}

sapply(attitude,takemean)
##     rating complaints privileges   learning     raises   critical    advance 
##   64.63333   66.60000   53.13333   56.36667   64.63333   74.76667   42.93333
summary(attitude)
##      rating        complaints     privileges       learning         raises     
##  Min.   :40.00   Min.   :37.0   Min.   :30.00   Min.   :34.00   Min.   :43.00  
##  1st Qu.:58.75   1st Qu.:58.5   1st Qu.:45.00   1st Qu.:47.00   1st Qu.:58.25  
##  Median :65.50   Median :65.0   Median :51.50   Median :56.50   Median :63.50  
##  Mean   :64.63   Mean   :66.6   Mean   :53.13   Mean   :56.37   Mean   :64.63  
##  3rd Qu.:71.75   3rd Qu.:77.0   3rd Qu.:62.50   3rd Qu.:66.75   3rd Qu.:71.00  
##  Max.   :85.00   Max.   :90.0   Max.   :83.00   Max.   :75.00   Max.   :88.00  
##     critical        advance     
##  Min.   :49.00   Min.   :25.00  
##  1st Qu.:69.25   1st Qu.:35.00  
##  Median :77.50   Median :41.00  
##  Mean   :74.77   Mean   :42.93  
##  3rd Qu.:80.00   3rd Qu.:47.75  
##  Max.   :92.00   Max.   :72.00