This file contains links and notes to Youtube videos describing R functions and concepts. Datasets used in this document are either preloaded in R, or are custom and posted below each header. It is recommended to load/install tidyverse as pipes (%>%) are frequently used, and many packages covered in this document are part of tidyverse.
#load tidyverse
library(tidyverse)
Package: Base R, Dataset: iris
Video Link: 3:13
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
Video Link: 7:15
prop.table(table(iris$Species))
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
Video Link: 5:45
#multiply prop.table formula by 100
prop.table(table(iris$Species))*100
##
## setosa versicolor virginica
## 33.33333 33.33333 33.33333
Video Link: 8:32
#The petal width count for 3 different flower species are shown.
xtabs(~Petal.Width +Species, iris)
## Species
## Petal.Width setosa versicolor virginica
## 0.1 5 0 0
## 0.2 29 0 0
## 0.3 7 0 0
## 0.4 7 0 0
## 0.5 1 0 0
## 0.6 1 0 0
## 1 0 7 0
## 1.1 0 3 0
## 1.2 0 5 0
## 1.3 0 13 0
## 1.4 0 7 1
## 1.5 0 10 2
## 1.6 0 3 1
## 1.7 0 1 1
## 1.8 0 1 11
## 1.9 0 0 5
## 2 0 0 6
## 2.1 0 0 6
## 2.2 0 0 3
## 2.3 0 0 8
## 2.4 0 0 3
## 2.5 0 0 3
Package: gtsummary, Dataset: CO2
Video Link: 0:44
CO2 %>% select(!c(Plant,conc)) %>% #Removing ‘Plant’ and ‘conc’ variables
tbl_summary() #creates summary table
| Characteristic | N = 841 |
|---|---|
| Type | |
| Quebec | 42 (50%) |
| Mississippi | 42 (50%) |
| Treatment | |
| nonchilled | 42 (50%) |
| chilled | 42 (50%) |
| uptake | 28 (18, 37) |
| 1 n (%); Median (IQR) | |
Video Link: 1:42
CO2 %>% select(!c(Plant,conc)) %>%
tbl_summary(by = Type) #organizes the data by the categorical variable 'Type'
| Characteristic | Quebec, N = 421 | Mississippi, N = 421 |
|---|---|---|
| Treatment | ||
| nonchilled | 21 (50%) | 21 (50%) |
| chilled | 21 (50%) | 21 (50%) |
| uptake | 37 (30, 40) | 19 (14, 28) |
| 1 n (%); Median (IQR) | ||
Video Link: 2:13
CO2 %>% select(!c(Plant,conc)) %>%
tbl_summary(by = Type) %>%
add_p() # adds p-values to the table
| Characteristic | Quebec, N = 421 | Mississippi, N = 421 | p-value2 |
|---|---|---|---|
| Treatment | >0.9 | ||
| nonchilled | 21 (50%) | 21 (50%) | |
| chilled | 21 (50%) | 21 (50%) | |
| uptake | 37 (30, 40) | 19 (14, 28) | <0.001 |
| 1 n (%); Median (IQR) | |||
| 2 Pearson’s Chi-squared test; Wilcoxon rank sum test | |||
Video Link: 3:02
CO2 %>% select(!c(Plant,conc)) %>%
tbl_summary(by = Type,
statistic = list(all_continuous() ~ "{mean} ({sd})", #adds mean and standard deviation
all_categorical() ~ "{n} / {N} ({p}%)" ), #add percentage
digits = all_continuous() ~ 2) %>% #rounds decimal to 2 digits
add_overall() %>% #adds total column
modify_spanning_header(c("stat_1", "stat_2") ~ "**Location**") #adds title 'Location'
| Characteristic | Overall, N = 841 | Location | |
|---|---|---|---|
| Quebec, N = 421 | Mississippi, N = 421 | ||
| Treatment | |||
| nonchilled | 42 / 84 (50%) | 21 / 42 (50%) | 21 / 42 (50%) |
| chilled | 42 / 84 (50%) | 21 / 42 (50%) | 21 / 42 (50%) |
| uptake | 27.21 (10.81) | 33.54 (9.67) | 20.88 (7.82) |
| 1 n / N (%); Mean (SD) | |||
Video Link: 4:27
#Crosstab shows the relationship between two categorical variables
CO2 %>% #dataset
tbl_cross(row = Type, col = Treatment, percent = "cell") %>% #creates cross tables w/ Type and Treatment
add_p() #adds p-values
| Treatment | Total | p-value1 | ||
|---|---|---|---|---|
| nonchilled | chilled | |||
| Type | >0.9 | |||
| Quebec | 21 (25%) | 21 (25%) | 42 (50%) | |
| Mississippi | 21 (25%) | 21 (25%) | 42 (50%) | |
| Total | 42 (50%) | 42 (50%) | 84 (100%) | |
| 1 Pearson’s Chi-squared test | ||||
Package: Base R, Dataset: custom
Video Link: 1:23
#Add color by using the text characters
barplot(values, col = "#1b98e0")
#Add color by using color name
barplot(values, col = "darkgreen")
### Change bar orientation to horizontal Video Link: 2:00
barplot(values, horiz = TRUE)
Video Link: 2:27
#Create a vector called group that contains that labels for the barplot
group <- LETTERS[1:5]
#Assign the new vector to ‘names.arg’
barplot(values, names.arg = group)
Video Link: 3:33
# Create matrix called data for the data that includes two different values for each column/label
data <- as.matrix(data.frame( #as.matrix converts the dataframe to a matrix
A = c(0.2, 0.4),
B = c(0.3,0.1),
C = c(0.7, 0.1),
D = c(0.1, 0.2),
E = c(0.3, 0.3)))
# Create row names for the matrix called Group 1 and Group 2
rownames(data) <- c ("Group 1", "Group 2")
#Plot stacked bar plot with matrix data with 2 different colors
barplot(data, col = c("#1b98e0", "#353436"))
Video Link: 5:04
#legend must be executed with barplot in the same code chunk
barplot(data, col = c("#1b98e0", "#353436"))
legend("topright", #location of legend
legend = c("Group 1", "Group 2"), #names of legend
fill = c("#1b98e0", "#353436")) #fill colors for legend
Video Link: 6:25
#The groups should be some type of categorical data
barplot(data,
col = c ("#1b98e0", "#353436"),
beside = TRUE) #places columns beside each other
Video Link: 24:28
#Using diamonds pre-loaded dataset
#Create object containing mean price by subgroup clarity
diamonds_m_cl_co <- aggregate(diamonds, price ~ clarity + color, mean)
#Plot grouped barplot with object containing subgroups
ggplot(diamonds_m_cl_co, aes(x=clarity, y = price, fill = color)) +
geom_bar(stat = "identity", position = "dodge") #dodge makes barplot grouped.
Video Link: 7:48
#Custom data
values <- c(.4, .75, 0.2, 0.6, 0.5)
#load ggplot2
library(ggplot2) #automatically loaded with tidyverse
Video Link: 8:09
#Dataframe is a slightly different way to store data than a matrix.
data_ggp <- data.frame(group, values)
#If unsure of your data type, run ‘typeof(name_of_data)’
typeof(data_ggp) #should now say dataframe
## [1] "list"
Video Link: 8:47
ggplot(data_ggp, aes(x = group, y = values)) +
geom_bar(stat = "identity") #specifies a bar chart based on our data
Video Link: 0:15
data <- data.frame(
x = c("A", "B", "C", "D", "E"),
y = c(0.5, 2, 1.2, -0.2, 0.7))
Video Link: 1:55
# Create duplicate of data
data1 <- data
# Change factor levels of data
#changes the factor ordering to ‘B’, ‘D’, ‘E’, ‘C’, ‘A’
data1$x <- factor(data1$x, levels = c("B", "D", "E", "C", "A"))
#Default factor ordering is ‘A’, ‘B’, ‘C’, ‘D’, ‘E’
Video Link: 3:30
# Step 1: Duplicate data
data2 <- data
#Step 2: Change factor levels to increasing order ()
#Replace x and y with variable names used in your plot
data2$x <- factor(data2$x, levels = data2$x[order(data2$y)])
#Step 3: Create plot with new data
ggplot(data2, aes(x, y)) +
geom_bar(stat = "identity")
Video Link: 4:30
#Step 1: Duplicate data
data3 <- data
#Step 2: Change factor levels to decreasing order
data3$x <- factor(data3$x, levels = data3$x[order(data3$y, decreasing = TRUE)])
#Replace ‘TRUE’ with ‘FALSE’ to organize in increasing order
#Step 3: Create plot with new data
ggplot(data3, aes(x, y)) +
geom_bar(stat = "identity")
Package: ggplot2, Dataset: custom
Video Link:2:22
data <- data.frame(
x = 1:9,
y = c(3, 1, 4, 3, 5, 2, 1, 2, 3),
group = rep(LETTERS[1:3], each = 3))
Video Link: 3:22
#shows just x and y axes, and tick marks
ggplot(data, aes(x = x, y = y))
Video Link: 3:55
ggplot(data, aes(x = x, y = y)) +
geom_point() #specifies a scatter plot
Video Link: 4:45
ggplot(data, aes(x = x, y = y)) +
geom_point(size =3) #changes point size
Video Link: 5:10
ggplot(data, aes(x = x, y = y, col = group)) + #col = group specifies the color grouping
geom_point(size =3) +
theme(legend.position = "none") #removes default legend
Video Link: 6:08
ggp_simple <- ggplot(data, aes(x = x, y = y, col = group)) +
geom_point(size =3 ) #this save the plot to ggp_simple
ggp_simple #the plot won't appear unless the object 'ggp_simple' is called
Video Link: 7:01
ggp_simple +
scale_x_continuous(limits = c(-3, 15)) #changes the numeric range of x-axis
Video Link: 7:52
ggp_simple +
scale_color_manual(
breaks = c("A", "B", "C"),
values = c("#1b98e0", #selects three different colors. One for each group A, B, C.
"#353436",
"#e32f08"))
Video Link: 8:58
ggp_simple +
scale_x_continuous(limits = c(-3, 15)) + # the '+' allows multiple layers to be added
scale_color_manual(
breaks = c("A", "B", "C"),
values = c("#1b98e0",
"#353436",
"#e32f08"))
Video Link: 9:45
#Facet layers are helpful when your original plot has too much data
ggp_simple +
scale_x_continuous(limits = c(-3, 15)) +
scale_color_manual(
breaks = c("A", "B", "C"),
values = c("#1b98e0",
"#353436",
"#e32f08")) +
facet_wrap(group ~ .) + #adds facet layer
theme_bw() # changes graph background from gray to white
Package: ggplot2, Dataset: diamonds
Video Link: 18:04
ggplot(diamonds, aes(x = price, y = carat)) +
geom_point () +
facet_wrap(clarity ~ . ) +
geom_smooth(method = "lm", formula = y ~ x) #adds regression line
Video Link: 19:44
#For density plots only input one column/variable since density will be graphed on the y-axis
ggplot(diamonds, aes(x = depth)) + #variable is depth
geom_density()
Video Link: 20:36
#‘fill’ colors the area between the density line and the x-axis, as opposed to coloring just the line
ggplot(diamonds, aes(x = depth, fill =cut)) + # adds multiple density graphs to plot
geom_density(alpha = .3) #makes color more translucent. Can be set to any number lower than 1
Package: ggplot2, Dataset: diamonds
Video Link: 0:33
#create vector called 'colors' that has 7 different colors
colors <- c("#FFFFFF","#F5FCC2","#E0ED87","#CCDE57",
"#B3C732","#94A813","#718200")
Video Link: 0:37
data <- diamonds %>%
group_by(color) %>% #categorical column
summarize(
counts = n(), #count column
percentage = n()/nrow(diamonds)) #percentage column
Video Link: 0:56
pie <- ggplot(data = data, aes(x="", y = percentage, fill = color)) +
geom_col(color = "black") + #outline color of pie chart
coord_polar("y", start = 0) + #creates pie chart. Following code is formatting.
geom_text(aes(label = paste0(round(percentage*100), "%")), #adds % lables to pie slices
position = position_stack(vjust = 0.5)) + #positions labels
theme(panel.background = element_blank(), #removes default background
axis.line = element_blank(), #removes axis line
axis.text= element_blank(), #removes axis text
axis.ticks = element_blank(), #removes axis ticks
axis.title=element_blank(), #removes title
plot.title = element_text(hjust = 0.5, size = 18)) + #format for title
ggtitle("Pie chart of Diamond Color") + #title
scale_fill_manual(values = colors)
pie #call pie chart
Package: ggplot2, Datasets: mtcars
Note: A bubble plot is a scatter plot except it has a 3rd numeric variable mapped to a size aesthetic Video Link: 0:05
Video Link: 0:25
data <- mtcars %>% mutate(
cyl = factor(cyl), #creates factor levels 4,6, and 8 for 'cyl' var. Needed for color grouping for code chunks below
Model = rownames(mtcars)) #creates 'Model' var that contains all the names of cars
Video Link: 0:30
plot1 <- data %>%
ggplot(aes(x = wt, y = mpg, size = hp)) + #‘size = hp’ is the 3rd variable that makes this a bubble plot
geom_point(alpha = 0.5) #'alpha .5' makes bubbles more transparent
plot1
Video Link: 1:09
#‘color = cyl’ adds color by grouping
plot2 <- data %>%
ggplot(aes(x = wt, y = mpg, size = hp, color = cyl, label = Model)) +
geom_point(alpha = 0.5) +
scale_size(range = c(.1, 15))
plot2
Video Link: 1:40
#Plotly maps are interactive. You can filter data by clicking on the legend and obtain point-specific data by hovering the cursor over a bubble point .
#Step 1: Load/install plotly
library(plotly)
#Step 2: Convert ggplot to plotly plot
p <- ggplotly(plot2, width=500, height=500) %>% #converts ggplot to plotly
layout(xaxis = list(range = c(1, 6)), #formatting
yaxis = list(range = c(8, 35)),
legend = list(x = 0.825, y = .975))
p
Package: Base R, Dataset: iris
Video Link: 0:23
#Returns the entire row where the variable ‘Sepal’ is the highest value
iris[which.max(iris$Sepal.Length), ] #Be sure to add a comma before the last bracket
Video Link: 1:22
#Returns the entire row where the variable ‘Sepal’ is the lowest value
iris[which.min(iris$Sepal.Length), ]
Video Link: 1:40
#Returns the entire row where the variable ‘Sepal’ is the 11th highest
iris[order(iris$Sepal.Length) [11], ]
(Package: Base R)
Video Link: 0:38
#change the words in quotation marks to a directory path on your computer
my_directory <- file.path("C:", "Users", "Joach", "Desktop")
#my_directory can be called or used in code to refer to the directory path
Video Link: 1:44
#If ‘my_file.csv’ wasn’t added, this would specify a directory path
My_file <- file.path("C:", "Users", "Joach", "Desktop", "my_file.csv")
Package: Base R, Dataset: airquality
Video Link: 1:05
#Returns matrix where TRUE is a missing value, FALSE is a numeric value
is.na(airquality)
## Ozone Solar.R Wind Temp Month Day
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] TRUE TRUE FALSE FALSE FALSE FALSE
## [6,] FALSE TRUE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] TRUE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE TRUE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] TRUE FALSE FALSE FALSE FALSE FALSE
## [26,] TRUE FALSE FALSE FALSE FALSE FALSE
## [27,] TRUE TRUE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] TRUE FALSE FALSE FALSE FALSE FALSE
## [33,] TRUE FALSE FALSE FALSE FALSE FALSE
## [34,] TRUE FALSE FALSE FALSE FALSE FALSE
## [35,] TRUE FALSE FALSE FALSE FALSE FALSE
## [36,] TRUE FALSE FALSE FALSE FALSE FALSE
## [37,] TRUE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] TRUE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] TRUE FALSE FALSE FALSE FALSE FALSE
## [43,] TRUE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] TRUE FALSE FALSE FALSE FALSE FALSE
## [46,] TRUE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] TRUE FALSE FALSE FALSE FALSE FALSE
## [53,] TRUE FALSE FALSE FALSE FALSE FALSE
## [54,] TRUE FALSE FALSE FALSE FALSE FALSE
## [55,] TRUE FALSE FALSE FALSE FALSE FALSE
## [56,] TRUE FALSE FALSE FALSE FALSE FALSE
## [57,] TRUE FALSE FALSE FALSE FALSE FALSE
## [58,] TRUE FALSE FALSE FALSE FALSE FALSE
## [59,] TRUE FALSE FALSE FALSE FALSE FALSE
## [60,] TRUE FALSE FALSE FALSE FALSE FALSE
## [61,] TRUE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] TRUE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] TRUE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] TRUE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] TRUE FALSE FALSE FALSE FALSE FALSE
## [84,] TRUE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE TRUE FALSE FALSE FALSE FALSE
## [97,] FALSE TRUE FALSE FALSE FALSE FALSE
## [98,] FALSE TRUE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] TRUE FALSE FALSE FALSE FALSE FALSE
## [103,] TRUE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] TRUE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] TRUE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] TRUE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE
## [141,] FALSE FALSE FALSE FALSE FALSE FALSE
## [142,] FALSE FALSE FALSE FALSE FALSE FALSE
## [143,] FALSE FALSE FALSE FALSE FALSE FALSE
## [144,] FALSE FALSE FALSE FALSE FALSE FALSE
## [145,] FALSE FALSE FALSE FALSE FALSE FALSE
## [146,] FALSE FALSE FALSE FALSE FALSE FALSE
## [147,] FALSE FALSE FALSE FALSE FALSE FALSE
## [148,] FALSE FALSE FALSE FALSE FALSE FALSE
## [149,] FALSE FALSE FALSE FALSE FALSE FALSE
## [150,] TRUE FALSE FALSE FALSE FALSE FALSE
## [151,] FALSE FALSE FALSE FALSE FALSE FALSE
## [152,] FALSE FALSE FALSE FALSE FALSE FALSE
## [153,] FALSE FALSE FALSE FALSE FALSE FALSE
Video Link: 2:00
#Returns the total amount of missing values
sum(is.na(airquality))
## [1] 44
Video Link: 3:10
#Deletes entire row where the missing value is present.
na.omit(airquality)
Video Link: 4:15
mean(airquality$Ozone, na.rm = TRUE)
## [1] 42.12931
Package: Base R, Dataset: mtcars
Note 1: If the data input is a column, sapply and lapply will apply the function to each value in the specified column.
Note 2: If the data input is a dataset, sapply and lapply will apply the function to each column.
Note 3: Using these functions is often quicker than using a for loop.
Video Link: 0:38
#Step 1: Load data (0:45)
data <- mtcars
#Step 2: Create demo function (0:50)
mpg_category <- function(mpg){ #creates function called mpg_categrory
if(mpg > 30){ #if mpg is greater than 30, "High" will be assigned
return("High")
}
else if (mpg > 20){ #if mpg is less than 30 but greater than 20, "Medium"
return("Medium")
} else if(mpg <21){
return("Low") #If not assigned "Medium" or "High", assign "Low"
}
}
#Step 3: Use lapply (1:10)
#applies function created above to every value of data$mpg and returns a list.
lapply(X = data$mpg, FUN = mpg_category)
## [[1]]
## [1] "Medium"
##
## [[2]]
## [1] "Medium"
##
## [[3]]
## [1] "Medium"
##
## [[4]]
## [1] "Medium"
##
## [[5]]
## [1] "Low"
##
## [[6]]
## [1] "Low"
##
## [[7]]
## [1] "Low"
##
## [[8]]
## [1] "Medium"
##
## [[9]]
## [1] "Medium"
##
## [[10]]
## [1] "Low"
##
## [[11]]
## [1] "Low"
##
## [[12]]
## [1] "Low"
##
## [[13]]
## [1] "Low"
##
## [[14]]
## [1] "Low"
##
## [[15]]
## [1] "Low"
##
## [[16]]
## [1] "Low"
##
## [[17]]
## [1] "Low"
##
## [[18]]
## [1] "High"
##
## [[19]]
## [1] "High"
##
## [[20]]
## [1] "High"
##
## [[21]]
## [1] "Medium"
##
## [[22]]
## [1] "Low"
##
## [[23]]
## [1] "Low"
##
## [[24]]
## [1] "Low"
##
## [[25]]
## [1] "Low"
##
## [[26]]
## [1] "Medium"
##
## [[27]]
## [1] "Medium"
##
## [[28]]
## [1] "High"
##
## [[29]]
## [1] "Low"
##
## [[30]]
## [1] "Low"
##
## [[31]]
## [1] "Low"
##
## [[32]]
## [1] "Medium"
Video Link: 1:57
#applies function to each value of mpg
sapply(X = data$mpg, FUN = mpg_category) #returns a vector
## [1] "Medium" "Medium" "Medium" "Medium" "Low" "Low" "Low" "Medium"
## [9] "Medium" "Low" "Low" "Low" "Low" "Low" "Low" "Low"
## [17] "Low" "High" "High" "High" "Medium" "Low" "Low" "Low"
## [25] "Low" "Medium" "Medium" "High" "Low" "Low" "Low" "Medium"