library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
if(TRUE) {
print("true")
} else { # CAUTION!!! else has to be right after the closing bracket of the previous section
print("false")
}
## [1] "true"
Professor’s example
price <- 15.99
if (price < 10) {
print ("This is an excellent deal!")
} else if (price < 11) {
print ("This is a fair price")
} else if (price < 12) {
print("This product is slightly overpriced")
} else {
print("This product is overpriced")
}
## [1] "This product is overpriced"
Returns vector of same length
a <- c(NA,7, 2, 9)
ifelse(is.na(a), "NA","Not NA")
## [1] "NA" "Not NA" "Not NA" "Not NA"
x <- 10
while (x>0) {
print(x)
x <- x-1
}
## [1] 10
## [1] 9
## [1] 8
## [1] 7
## [1] 6
## [1] 5
## [1] 4
## [1] 3
## [1] 2
## [1] 1
counter <- 0
while (counter < 9){
print(counter)
counter <- counter+1
}
## [1] 0
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
Used to iterate over a vector, like column or data frame
for (i in c(1, 2, 3, 4)) {
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
prices <- c(12.43, 9.99, 18.22, 7.25, 0.50)
numCheap <- 0
for (p in prices) {
if (p < 10) {
numCheap <- numCheap +1
}
}
print(numCheap)
## [1] 3
addTogether <- function(x, y) {
if ( (hasArg(x) == FALSE) & (hasArg(y)==FALSE))
{
print("You didn't enter any values. Please enter two numbers")
}
else if ((hasArg(x) == FALSE) | (hasArg(y)==FALSE))
{
print("You only entered one value. Please enter two values.")
}
else if (is.numeric(x) & is.numeric(y)==TRUE)
{
x + y
} else {
print("Sorry, please enter two numbers.")
}
}
#call function
addTogether("3",3)
## [1] "Sorry, please enter two numbers."
sapply(x, FUN)
Output is either vector or matrix
df1 <- as.data.frame(c(1, 2,3, 4, 5, 6, 7))
sapply(df1,max)
## c(1, 2, 3, 4, 5, 6, 7)
## 7
dplyrselect() choose columnsfilter() filter rowsmutate() create new columnssummarize() aggregate dataarrange() sort datagroup_by group datasales_data <- read_csv("salesdata.csv")
## New names:
## Rows: 10 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): Region, Product dbl (3): ...1, Sales, Profit date (1): Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
knitr::kable(sales_data)
| …1 | Date | Region | Product | Sales | Profit |
|---|---|---|---|---|---|
| 1 | 2021-01-01 | North America | A | 267.7419 | 38.53529 |
| 2 | 2021-01-02 | Europe | B | 100.6701 | 69.09701 |
| 3 | 2021-01-03 | North America | C | 357.8518 | 79.79671 |
| 4 | 2021-01-04 | Europe | A | 121.2791 | 41.44539 |
| 5 | 2021-01-05 | North America | B | 105.2566 | 93.81673 |
| 6 | 2021-01-06 | Europe | C | 399.9681 | 34.36973 |
| 7 | 2021-01-07 | North America | A | 412.3462 | 78.54400 |
| 8 | 2021-01-08 | Europe | B | 369.8670 | 65.02684 |
| 9 | 2021-01-09 | North America | C | 241.6270 | 29.62530 |
| 10 | 2021-01-10 | Europe | A | 286.5879 | 33.74372 |
Pipe operator %>%
kable(sales_data %>%
filter(Region == "North America") %>%
select(Date, Product, Sales) %>%
arrange(desc(Sales)))
| Date | Product | Sales |
|---|---|---|
| 2021-01-07 | A | 412.3462 |
| 2021-01-03 | C | 357.8518 |
| 2021-01-01 | A | 267.7419 |
| 2021-01-09 | C | 241.6270 |
| 2021-01-05 | B | 105.2566 |
select()choose specific columns
kable(sales_data %>% select(Date, Product, Sales))
| Date | Product | Sales |
|---|---|---|
| 2021-01-01 | A | 267.7419 |
| 2021-01-02 | B | 100.6701 |
| 2021-01-03 | C | 357.8518 |
| 2021-01-04 | A | 121.2791 |
| 2021-01-05 | B | 105.2566 |
| 2021-01-06 | C | 399.9681 |
| 2021-01-07 | A | 412.3462 |
| 2021-01-08 | B | 369.8670 |
| 2021-01-09 | C | 241.6270 |
| 2021-01-10 | A | 286.5879 |
filter()filter rows by a conditional
kable(sales_data %>% filter(Region == "North America"))
| …1 | Date | Region | Product | Sales | Profit |
|---|---|---|---|---|---|
| 1 | 2021-01-01 | North America | A | 267.7419 | 38.53529 |
| 3 | 2021-01-03 | North America | C | 357.8518 | 79.79671 |
| 5 | 2021-01-05 | North America | B | 105.2566 | 93.81673 |
| 7 | 2021-01-07 | North America | A | 412.3462 | 78.54400 |
| 9 | 2021-01-09 | North America | C | 241.6270 | 29.62530 |
mutate()create new columns
kable(sales_data %>% mutate(Profit_Margin = Profit/Sales))
| …1 | Date | Region | Product | Sales | Profit | Profit_Margin |
|---|---|---|---|---|---|---|
| 1 | 2021-01-01 | North America | A | 267.7419 | 38.53529 | 0.1439270 |
| 2 | 2021-01-02 | Europe | B | 100.6701 | 69.09701 | 0.6863707 |
| 3 | 2021-01-03 | North America | C | 357.8518 | 79.79671 | 0.2229881 |
| 4 | 2021-01-04 | Europe | A | 121.2791 | 41.44539 | 0.3417356 |
| 5 | 2021-01-05 | North America | B | 105.2566 | 93.81673 | 0.8913146 |
| 6 | 2021-01-06 | Europe | C | 399.9681 | 34.36973 | 0.0859312 |
| 7 | 2021-01-07 | North America | A | 412.3462 | 78.54400 | 0.1904807 |
| 8 | 2021-01-08 | Europe | B | 369.8670 | 65.02684 | 0.1758114 |
| 9 | 2021-01-09 | North America | C | 241.6270 | 29.62530 | 0.1226076 |
| 10 | 2021-01-10 | Europe | A | 286.5879 | 33.74372 | 0.1177430 |
summarize() and group_by()aggregate data
kable(sales_data %>% group_by(Product) %>% summarize(Total_Sales = sum(Sales)))
| Product | Total_Sales |
|---|---|
| A | 1087.9552 |
| B | 575.7937 |
| C | 999.4469 |
arrange()sorts data
kable(sales_data %>% arrange(desc(Sales))) # desc = descend
| …1 | Date | Region | Product | Sales | Profit |
|---|---|---|---|---|---|
| 7 | 2021-01-07 | North America | A | 412.3462 | 78.54400 |
| 6 | 2021-01-06 | Europe | C | 399.9681 | 34.36973 |
| 8 | 2021-01-08 | Europe | B | 369.8670 | 65.02684 |
| 3 | 2021-01-03 | North America | C | 357.8518 | 79.79671 |
| 10 | 2021-01-10 | Europe | A | 286.5879 | 33.74372 |
| 1 | 2021-01-01 | North America | A | 267.7419 | 38.53529 |
| 9 | 2021-01-09 | North America | C | 241.6270 | 29.62530 |
| 4 | 2021-01-04 | Europe | A | 121.2791 | 41.44539 |
| 5 | 2021-01-05 | North America | B | 105.2566 | 93.81673 |
| 2 | 2021-01-02 | Europe | B | 100.6701 | 69.09701 |
tryCatch() and stop()handle potential errors that might occur during the execution
addTogether <- function(x, y) {
tryCatch({
if (!is.numeric(x) || !is.numeric(y)) {
stop("Sorry, please enter two numeric values.")
}
x + y
}, error = function(e) {
cat("An error occurred:", conditionMessage(e), "\n")
})
}
# Call the function
addTogether(3)
## An error occurred: argument "y" is missing, with no default
log_and_divide <- function(x, y){
tryCatch(
{
result = log(x) / y
return(result)
},
error=function(e) {
message('An Error Occurred')
print(e)
},
warning=function(w) {
message('A Warning Occurred')
print(w)
return(NA)
}
)
}
log_and_divide(10)
## An Error Occurred
## <simpleError in log_and_divide(10): argument "y" is missing, with no default>
log_and_divide(-10, 2)
## A Warning Occurred
## <simpleWarning in log(x): NaNs produced>
## [1] NA
sapply()library(tibble)
df1 <-tibble(column1 = c(1,2,3,4,5,6,7))
sapply(df1, max)
## column1
## 7
df2 <-c(1,2,3,4,5,6,7)
sapply(df2, max)
## [1] 1 2 3 4 5 6 7
takemean <-function(x){
mean(x, na.rm=TRUE)
}
sapply(attitude,takemean)
## rating complaints privileges learning raises critical advance
## 64.63333 66.60000 53.13333 56.36667 64.63333 74.76667 42.93333
summary(attitude)
## rating complaints privileges learning raises
## Min. :40.00 Min. :37.0 Min. :30.00 Min. :34.00 Min. :43.00
## 1st Qu.:58.75 1st Qu.:58.5 1st Qu.:45.00 1st Qu.:47.00 1st Qu.:58.25
## Median :65.50 Median :65.0 Median :51.50 Median :56.50 Median :63.50
## Mean :64.63 Mean :66.6 Mean :53.13 Mean :56.37 Mean :64.63
## 3rd Qu.:71.75 3rd Qu.:77.0 3rd Qu.:62.50 3rd Qu.:66.75 3rd Qu.:71.00
## Max. :85.00 Max. :90.0 Max. :83.00 Max. :75.00 Max. :88.00
## critical advance
## Min. :49.00 Min. :25.00
## 1st Qu.:69.25 1st Qu.:35.00
## Median :77.50 Median :41.00
## Mean :74.77 Mean :42.93
## 3rd Qu.:80.00 3rd Qu.:47.75
## Max. :92.00 Max. :72.00