### 1. Entering data ###############################################################
## Basic math
2+2
## [1] 4
## Print numbers
1:100 # Print numbers 1 to 100 across several lines
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## [18] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
## [35] 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [52] 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## [69] 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
## [86] 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## Print characters
print("Hello World") #Print Hello World
## [1] "Hello World"
print("Hello World")
## [1] "Hello World"
## Assign value
# Individual values
a <- 1 # Use <- to assign
a
## [1] 1
2 -> b # Can go other way, but not an usual way
c <- d <- e <- 3 # Multiple assignments
# Since it is 'assign', it doesn't print / to print
a
## [1] 1
b
## [1] 2
# Multiple values
x <- c(1, 2, 5, 9) #c = Combine/concatenate
x #Print contents of x in in Console
## [1] 1 2 5 9
## Create sequential data
# 0 through 10 and 10 through 0
0:10
## [1] 0 1 2 3 4 5 6 7 8 9 10
10:0
## [1] 10 9 8 7 6 5 4 3 2 1 0
# Another way to create sequential data
seq(10) #1 to 10
## [1] 1 2 3 4 5 6 7 8 9 10
seq(30, 0, by = -3) #Count down by 3
## [1] 30 27 24 21 18 15 12 9 6 3 0
## Math
(y <- c(5, 1, 0, 10)) # Surround command with parentheses to print
## [1] 5 1 0 10
x + y # Adds corresponding elements in x and y
## [1] 6 3 5 19
x*2 # Multiplies each element in x by 2
## [1] 2 4 10 18
2^6 # Powers/exponents
## [1] 64
sqrt(64) # Squareroot
## [1] 8
log(100) # Natural log: base e (2.71828...)
## [1] 4.60517
log10(100) # Base 10 log
## [1] 2
### Data types
## 1) Numeric
n1 <- 15
n1
## [1] 15
typeof(n1) # Double-precision = essentially same to numeric variable
## [1] "double"
n2 <- 1.5
n2
## [1] 1.5
typeof(n2)
## [1] "double"
# Create numeric vectors
dbl_var <- c(1, 2.5, 4.5) # create a string of double-precision values
dbl_var
## [1] 1.0 2.5 4.5
## 2) Integer
# Create integer vectors
int_var <- c(1L, 6L, 10L) # placing an L after the values creates a string of integers
int_var
## [1] 1 6 10
## 3) Logical
# What is logical? TRUE and FALSE (or T and F) are reserved words denoting logical constants in the R language
# Example 1
l1 <- TRUE
l1
## [1] TRUE
typeof(l1)
## [1] "logical"
# Example 2
x = 1; y = 2 # sample values (Notice that x has a different value)
z = x > y # is x larger than y?
z # print the logical value
## [1] FALSE
class(z) # print the class name of z
## [1] "logical"
# Example 3
x <- c(1:10)
x
## [1] 1 2 3 4 5 6 7 8 9 10
x > 8
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
x < 5
## [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
x > 8 | x < 5 # | is logical operator meaning 'or'
## [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## 4) Character
c1 <- "c"
c1
## [1] "c"
typeof(c1)
## [1] "character"
c2 <- "a string of text"
c2
## [1] "a string of text"
typeof(c2)
## [1] "character"
### Data structures
## 1) Vector
v1 <- c(1, 2, 3, 4, 5)
v1
## [1] 1 2 3 4 5
is.vector(v1)
## [1] TRUE
v2 <- c("a", "b", "c") #string vector
v2
## [1] "a" "b" "c"
is.vector(v2)
## [1] TRUE
v3 <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
v3
## [1] TRUE TRUE FALSE FALSE TRUE
is.vector(v3)
## [1] TRUE
## 2) Matrix
m1 <- matrix(c(T, T, F, F, T, F), nrow = 2)
m1
## [,1] [,2] [,3]
## [1,] TRUE FALSE TRUE
## [2,] TRUE FALSE FALSE
m2 <- matrix(c("a", "b",
"c", "d"),
nrow = 2,
byrow = T)
m2
## [,1] [,2]
## [1,] "a" "b"
## [2,] "c" "d"
#byrow=TRUE indicates that the matrix should be filled by rows. byrow=FALSE indicates that the matrix should be filled by columns (the default)
## 3) Array
# Arrays are the R data objects which can store data in more than two dimensions
a1 <- array(c(1:24), c(4, 3, 2))
a1 # print three dimensional data
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 13 17 21
## [2,] 14 18 22
## [3,] 15 19 23
## [4,] 16 20 24
# Can store only the same types (numeric, logical, character)
## 4) Data frame
# Can store different data types in different columns
# Every item within a particular column has to be of the samne type
vNumeric <- c(1, 2, 3)
vCharacter <- c("a", "b", "c")
vLogical <- c(T, F, T)
df1 <- cbind(vNumeric, vCharacter, vLogical)
df1 # Coerces all values to most basic data type
## vNumeric vCharacter vLogical
## [1,] "1" "a" "TRUE"
## [2,] "2" "b" "FALSE"
## [3,] "3" "c" "TRUE"
df2 <- as.data.frame(cbind(vNumeric, vCharacter, vLogical))
df2 # Makes a data frame with three different data types
## vNumeric vCharacter vLogical
## 1 1 a TRUE
## 2 2 b FALSE
## 3 3 c TRUE
## 5) List
# Can hold different data types
o1 <- c(1, 2, 3)
o2 <- c("a", "b", "c", "d")
o3 <- c(T, F, T, T, F)
list1 <- list(o1, o2, o3)
list1
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] "a" "b" "c" "d"
##
## [[3]]
## [1] TRUE FALSE TRUE TRUE FALSE
list2 <- list(o1, o2, o3, list1) #list 1 is nested in list2
list2
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] "a" "b" "c" "d"
##
## [[3]]
## [1] TRUE FALSE TRUE TRUE FALSE
##
## [[4]]
## [[4]][[1]]
## [1] 1 2 3
##
## [[4]][[2]]
## [1] "a" "b" "c" "d"
##
## [[4]][[3]]
## [1] TRUE FALSE TRUE TRUE FALSE
### Coercing types
## Automatic coercion
# When you attempt to combine different types they will be coerced to the most flexible type. Types from least to most flexible are: logical, integer, double, and character.
(coerce1 <- c(1, "b", TRUE))
## [1] "1" "b" "TRUE"
typeof(coerce1)
## [1] "character"
## Coerce numeric to integer
(coerce2 <- 5)
## [1] 5
typeof (coerce2)
## [1] "double"
(coerce3 <- as.integer(5))
## [1] 5
typeof (coerce3)
## [1] "integer"
## Coerce character to numeric
(coerce4 <- c("1", "2", "3"))
## [1] "1" "2" "3"
typeof (coerce4)
## [1] "character"
(coerce5 <- as.numeric("1", "2", "3"))
## [1] 1
typeof (coerce5)
## [1] "double"
## Coerce matrix to data frame
(coerce6 <- matrix(1:9, nrow=3))
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
is.matrix(coerce6)
## [1] TRUE
(coerce7 <- as.data.frame(matrix(1:9, nrow=3)))
## V1 V2 V3
## 1 1 4 7
## 2 2 5 8
## 3 3 6 9
is.data.frame(coerce7)
## [1] TRUE
### 2. Header and comment #############################################################
## Format
## ---------------------------
##
## Script name:
##
## Purpose of script:
##
## Author:
##
## Date Created:
##
## Email:
##
## ---------------------------
##
## Notes:
##
##
## ---------------------------
### 3. Packages for R ################################################################
## Install packages: Method 1
install.packages("haven", repos = "http://cran.us.r-project.org")
## package 'haven' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("stringr", repos = "http://cran.us.r-project.org")
## package 'stringr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("ggplot2", repos = "http://cran.us.r-project.org")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("foreign", repos = "http://cran.us.r-project.org")
## package 'foreign' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'foreign'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\foreign\libs\x64\foreign.dll
## to C:
## \Users\ehk994\Documents\R\R-3.6.1\library\foreign\libs\x64\foreign.dll:
## Permission denied
## Warning: restored 'foreign'
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("MASS", repos = "http://cran.us.r-project.org")
## package 'MASS' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'MASS'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\MASS\libs\x64\MASS.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\MASS\libs\x64\MASS.dll:
## Permission denied
## Warning: restored 'MASS'
##
## The downloaded binary packages are in
## C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
## Install packages: Method 2
# In packages, plots, files pane, manually do it
## Load a package: Method 1
library(ggplot2)
## Load a package: Method 2
require(haven)
## Loading required package: haven
### 4. Importing and writing data from a spreadsheet
## If your data is saved as tab-delimited text file or *.txt file with header (default is tab-delimiter so no need to specify "sep = ")
df1 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data1.txt", header = TRUE)
## If your data is saved using another delimeter (i.e. /) without header
df2 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data2.txt", sep = "/", header = F)
## If your data is saved using csv file (delimeter = ,) with header
df3 <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data3.csv", sep = ",", header = T)
## Write csv file
write.csv(df1, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df1.csv", row.names=TRUE)
## Export data set to a tab-separated file
write.table(df2, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df2.csv", sep = "\t",row.names = FALSE)
### 5. Simple visualization
# Read Titanic dataset
titanic <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/titanic.csv", sep = ",", header = TRUE)
# Simple Bar Plot
counts <- table(titanic$Pclass)
barplot(counts, main="Passenger Distribution",
xlab="Class Level")

# Grouped Bar Plot
counts <- table(titanic$Survived, titanic$Pclass)
barplot(counts, main="Passenger Distribution by Survived and
Class",xlab="Class Level", col=c("darkblue","red"),legend
= rownames(counts), beside=TRUE)

# Boxplot of Fare by Pclass
boxplot(Fare~Pclass,data=titanic, main="Boxplot",
xlab="Class Level", ylab="Fare of Tickets")

# Simple Pie Chart
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main="Pie Chart of Countries")

# Simple Scatterplot
attach(titanic)
plot(Age, Fare, main="Scatterplot Example",
xlab="Age", ylab="Fare ", pch=19)

### 6. Subset
## Select variables (i.e., columns)
# Select certain variables with their variable names
myvars <- c("Survived", "Pclass", "Name")
col_titanic <- titanic[myvars]
# Select 1st and 5th through 8th variables
col_titanic2 <- titanic[c(1,5:8)]
## Exclude 3rd and 5th variable
col_titanic3 <- titanic[c(-3,-5)]
## Select observations (i.e., rows)
# Select first 5 obesrvations
row_titanic1 <- titanic[1:5,]
# Select observations based on variable values
sub_titanic <- titanic[ which(titanic$Sex=="female"
& titanic$Age > 35), ]
# Use subset function
sub_titanic2 <- subset(titanic, Age >= 20 | Age < 10,
select=c(Survived, Pclass))
# Use subset function
sub_titanic2 <- subset(titanic, Sex=="male" & Age > 25,
select=Survived:Age)
# Random samples
# take a random sample of size 50 from a dataset titanic
# sample without replacement
random_titanic <- titanic[sample(1:nrow(titanic), 50,
replace=FALSE),]