library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(knitr)

Videos

Conditionals, controls, and functions

if(){} else

if(TRUE) {
  print("true")
} else { # CAUTION!!! else has to be right after the closing bracket of the previous section
  print("false")
}

## [1] "true"

Professor’s example

price <- 15.99

if (price < 10) {
  print ("This is an excellent deal!")
} else if (price < 11) {
  print ("This is a fair price")
} else if (price < 12) {
  print("This product is slightly overpriced")
} else {
  print("This product is overpriced")
}

## [1] "This product is overpriced"

ifelse()

Returns vector of same length

a <- c(NA,7, 2, 9)
ifelse(is.na(a), "NA","Not NA")

## [1] "NA"     "Not NA" "Not NA" "Not NA"

Creating loops

while

x <- 10
while (x>0) {
  print(x)
  x <- x-1
}

## [1] 10
## [1] 9
## [1] 8
## [1] 7
## [1] 6
## [1] 5
## [1] 4
## [1] 3
## [1] 2
## [1] 1

counter <- 0
while (counter < 9){
  print(counter)
  counter <- counter+1
}

## [1] 0
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8

for

Used to iterate over a vector, like column or data frame

for (i in c(1, 2, 3, 4)) {
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4

prices <- c(12.43, 9.99, 18.22, 7.25, 0.50)
numCheap <- 0
for (p in prices) {
  if (p < 10) {
    numCheap <- numCheap +1
  }
}
print(numCheap)

## [1] 3

User-defined functions

addTogether <- function(x, y) {
  if ( (hasArg(x) == FALSE) & (hasArg(y)==FALSE))
  {
    print("You didn't enter any values. Please enter two numbers")
  }
  else if ((hasArg(x) == FALSE) | (hasArg(y)==FALSE)) 
    {
    print("You only entered one value. Please enter two values.")
  }
  else if (is.numeric(x) & is.numeric(y)==TRUE)
    {
      x + y
  } else {
      print("Sorry, please enter two numbers.")
    }
}

#call function
addTogether("3",3)

## [1] "Sorry, please enter two numbers."

Apply to all elements of the input

sapply(x, FUN)
Output is either vector or matrix

df1 <- as.data.frame(c(1, 2,3, 4, 5, 6, 7))
sapply(df1,max)

## c(1, 2, 3, 4, 5, 6, 7) 
##                      7

`dplyr`

select() choose columns
filter() filter rows
mutate() create new columns
summarize() aggregate data
arrange() sort data
group_by group data

sales_data <- read_csv("salesdata.csv")

## New names:
## Rows: 10 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): Region, Product dbl (3): ...1, Sales, Profit date (1): Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

knitr::kable(sales_data)

…1	Date	Region	Product	Sales	Profit
1	2021-01-01	North America	A	267.7419	38.53529
2	2021-01-02	Europe	B	100.6701	69.09701
3	2021-01-03	North America	C	357.8518	79.79671
4	2021-01-04	Europe	A	121.2791	41.44539
5	2021-01-05	North America	B	105.2566	93.81673
6	2021-01-06	Europe	C	399.9681	34.36973
7	2021-01-07	North America	A	412.3462	78.54400
8	2021-01-08	Europe	B	369.8670	65.02684
9	2021-01-09	North America	C	241.6270	29.62530
10	2021-01-10	Europe	A	286.5879	33.74372

Pipe operator %>%

allows chaining operators
passes result of one function to the next

kable(sales_data %>%
        filter(Region == "North America") %>%
        select(Date, Product, Sales) %>%
        arrange(desc(Sales)))

Date	Product	Sales
2021-01-07	A	412.3462
2021-01-03	C	357.8518
2021-01-01	A	267.7419
2021-01-09	C	241.6270
2021-01-05	B	105.2566

`select()`

choose specific columns

kable(sales_data %>% select(Date, Product, Sales))

Date	Product	Sales
2021-01-01	A	267.7419
2021-01-02	B	100.6701
2021-01-03	C	357.8518
2021-01-04	A	121.2791
2021-01-05	B	105.2566
2021-01-06	C	399.9681
2021-01-07	A	412.3462
2021-01-08	B	369.8670
2021-01-09	C	241.6270
2021-01-10	A	286.5879

`filter()`

filter rows by a conditional

kable(sales_data %>% filter(Region == "North America"))

…1	Date	Region	Product	Sales	Profit
1	2021-01-01	North America	A	267.7419	38.53529
3	2021-01-03	North America	C	357.8518	79.79671
5	2021-01-05	North America	B	105.2566	93.81673
7	2021-01-07	North America	A	412.3462	78.54400
9	2021-01-09	North America	C	241.6270	29.62530

`mutate()`

create new columns

kable(sales_data %>% mutate(Profit_Margin = Profit/Sales))

…1	Date	Region	Product	Sales	Profit	Profit_Margin
1	2021-01-01	North America	A	267.7419	38.53529	0.1439270
2	2021-01-02	Europe	B	100.6701	69.09701	0.6863707
3	2021-01-03	North America	C	357.8518	79.79671	0.2229881
4	2021-01-04	Europe	A	121.2791	41.44539	0.3417356
5	2021-01-05	North America	B	105.2566	93.81673	0.8913146
6	2021-01-06	Europe	C	399.9681	34.36973	0.0859312
7	2021-01-07	North America	A	412.3462	78.54400	0.1904807
8	2021-01-08	Europe	B	369.8670	65.02684	0.1758114
9	2021-01-09	North America	C	241.6270	29.62530	0.1226076
10	2021-01-10	Europe	A	286.5879	33.74372	0.1177430

`summarize()` and `group_by()`

aggregate data

kable(sales_data %>% group_by(Product) %>% summarize(Total_Sales = sum(Sales)))

Product	Total_Sales
A	1087.9552
B	575.7937
C	999.4469

`arrange()`

sorts data

kable(sales_data %>% arrange(desc(Sales))) # desc = descend

…1	Date	Region	Product	Sales	Profit
7	2021-01-07	North America	A	412.3462	78.54400
6	2021-01-06	Europe	C	399.9681	34.36973
8	2021-01-08	Europe	B	369.8670	65.02684
3	2021-01-03	North America	C	357.8518	79.79671
10	2021-01-10	Europe	A	286.5879	33.74372
1	2021-01-01	North America	A	267.7419	38.53529
9	2021-01-09	North America	C	241.6270	29.62530
4	2021-01-04	Europe	A	121.2791	41.44539
5	2021-01-05	North America	B	105.2566	93.81673
2	2021-01-02	Europe	B	100.6701	69.09701

Readings

`tryCatch()` and `stop()`

handle potential errors that might occur during the execution

addTogether <- function(x, y) {
  tryCatch({
    if (!is.numeric(x) || !is.numeric(y)) {
      stop("Sorry, please enter two numeric values.")
    }
    x + y
  }, error = function(e) {
    cat("An error occurred:", conditionMessage(e), "\n")
  })
}

# Call the function
addTogether(3)

## An error occurred: argument "y" is missing, with no default

Example

log_and_divide <- function(x, y){
    tryCatch(
        {
        result = log(x) / y
        return(result)
        },
        error=function(e) {
            message('An Error Occurred')
            print(e)
        },
        warning=function(w) {
            message('A Warning Occurred')
            print(w)
            return(NA)
        }
    )
}

log_and_divide(10)

## An Error Occurred

## <simpleError in log_and_divide(10): argument "y" is missing, with no default>

log_and_divide(-10, 2)

## A Warning Occurred

## <simpleWarning in log(x): NaNs produced>

## [1] NA

`sapply()`

library(tibble)
df1 <-tibble(column1 = c(1,2,3,4,5,6,7))
sapply(df1, max)

## column1 
##       7

df2 <-c(1,2,3,4,5,6,7)
sapply(df2, max)

## [1] 1 2 3 4 5 6 7

takemean <-function(x){
  mean(x, na.rm=TRUE)
}

sapply(attitude,takemean)

##     rating complaints privileges   learning     raises   critical    advance 
##   64.63333   66.60000   53.13333   56.36667   64.63333   74.76667   42.93333

summary(attitude)

##      rating        complaints     privileges       learning         raises     
##  Min.   :40.00   Min.   :37.0   Min.   :30.00   Min.   :34.00   Min.   :43.00  
##  1st Qu.:58.75   1st Qu.:58.5   1st Qu.:45.00   1st Qu.:47.00   1st Qu.:58.25  
##  Median :65.50   Median :65.0   Median :51.50   Median :56.50   Median :63.50  
##  Mean   :64.63   Mean   :66.6   Mean   :53.13   Mean   :56.37   Mean   :64.63  
##  3rd Qu.:71.75   3rd Qu.:77.0   3rd Qu.:62.50   3rd Qu.:66.75   3rd Qu.:71.00  
##  Max.   :85.00   Max.   :90.0   Max.   :83.00   Max.   :75.00   Max.   :88.00  
##     critical        advance     
##  Min.   :49.00   Min.   :25.00  
##  1st Qu.:69.25   1st Qu.:35.00  
##  Median :77.50   Median :41.00  
##  Mean   :74.77   Mean   :42.93  
##  3rd Qu.:80.00   3rd Qu.:47.75  
##  Max.   :92.00   Max.   :72.00

Lesson 6

Sammy Amos

2024-09-15

Videos

Conditionals, controls, and functions

if(){} else

ifelse()

Creating loops

while

for

User-defined functions

Apply to all elements of the input

`dplyr`

`select()`

`filter()`

`mutate()`

`summarize()` and `group_by()`

`arrange()`

Readings

`tryCatch()` and `stop()`

`sapply()`

Lesson 6

Sammy Amos

2024-09-15

Videos

Conditionals, controls, and functions

if(){} else

ifelse()

Creating loops

while

for

User-defined functions

Apply to all elements of the input

dplyr

select()

filter()

mutate()

summarize() and group_by()

arrange()

Readings

tryCatch() and stop()

sapply()

`dplyr`

`select()`

`filter()`

`mutate()`

`summarize()` and `group_by()`

`arrange()`

`tryCatch()` and `stop()`

`sapply()`