RTutorial

### 1. Entering data ###############################################################
## Basic math
2+2

## [1] 4

## Print numbers
1:100 # Print numbers 1 to 100 across several lines

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
##  [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
##  [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
##  [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100

## Print characters
print("Hello World") #Print Hello World

## [1] "Hello World"

print("Hello World")

## [1] "Hello World"

## Assign value
# Individual values
a <- 1            # Use <- to assign
a

## [1] 1

2 -> b            # Can go other way, but not an usual way
c <- d <- e <- 3  # Multiple assignments

# Since it is 'assign', it doesn't print / to print
a

## [1] 1

## [1] 2

# Multiple values
x <- c(1, 2, 5, 9)  #c = Combine/concatenate
x                   #Print contents of x in in Console

## [1] 1 2 5 9

## Create sequential data
# 0 through 10 and 10 through 0
0:10

##  [1]  0  1  2  3  4  5  6  7  8  9 10

10:0

##  [1] 10  9  8  7  6  5  4  3  2  1  0

# Another way to create sequential data
seq(10)        #1 to 10

##  [1]  1  2  3  4  5  6  7  8  9 10

seq(30, 0, by = -3)       #Count down by 3

##  [1] 30 27 24 21 18 15 12  9  6  3  0

## Math
(y <- c(5, 1, 0, 10))  # Surround command with parentheses to print

## [1]  5  1  0 10

x + y         # Adds corresponding elements in x and y

## [1]  6  3  5 19

x*2           # Multiplies each element in x by 2

## [1]  2  4 10 18

2^6           # Powers/exponents

## [1] 64

sqrt(64)      # Squareroot

## [1] 8

log(100)      # Natural log: base e (2.71828...)

## [1] 4.60517

log10(100)    # Base 10 log

## [1] 2

### Data types
## 1) Numeric
n1 <- 15   
n1

## [1] 15

typeof(n1) # Double-precision = essentially same to numeric variable

## [1] "double"

n2 <- 1.5
n2

## [1] 1.5

typeof(n2)

## [1] "double"

# Create numeric vectors
dbl_var <- c(1, 2.5, 4.5) # create a string of double-precision values
dbl_var

## [1] 1.0 2.5 4.5

## 2) Integer
# Create integer vectors
int_var <- c(1L, 6L, 10L) # placing an L after the values creates a string of integers
int_var

## [1]  1  6 10

## 3) Logical 
# What is logical? TRUE and FALSE (or T and F) are reserved words denoting logical constants in the R language 

# Example 1
l1 <- TRUE
l1

## [1] TRUE

typeof(l1)

## [1] "logical"

# Example 2
x = 1; y = 2   # sample values (Notice that x has a different value)
z = x > y      # is x larger than y? 
z              # print the logical value

## [1] FALSE

class(z)       # print the class name of z

## [1] "logical"

# Example 3
x <- c(1:10)
x

##  [1]  1  2  3  4  5  6  7  8  9 10

x > 8

##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE

x < 5

##  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE

x > 8 | x < 5 # | is logical operator meaning 'or'

##  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE

## 4) Character
c1 <- "c" 
c1

## [1] "c"

typeof(c1)

## [1] "character"

c2 <- "a string of text" 
c2

## [1] "a string of text"

typeof(c2)

## [1] "character"

### Data structures
## 1) Vector
v1 <- c(1, 2, 3, 4, 5)
v1

## [1] 1 2 3 4 5

is.vector(v1)

## [1] TRUE

v2 <- c("a", "b", "c") #string vector
v2

## [1] "a" "b" "c"

is.vector(v2)

## [1] TRUE

v3 <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
v3

## [1]  TRUE  TRUE FALSE FALSE  TRUE

is.vector(v3)

## [1] TRUE

## 2) Matrix
m1 <- matrix(c(T, T, F, F, T, F), nrow = 2)
m1

##      [,1]  [,2]  [,3]
## [1,] TRUE FALSE  TRUE
## [2,] TRUE FALSE FALSE

m2 <- matrix(c("a", "b",
               "c", "d"),
               nrow = 2,
               byrow = T)
m2

##      [,1] [,2]
## [1,] "a"  "b" 
## [2,] "c"  "d"

#byrow=TRUE indicates that the matrix should be filled by rows. byrow=FALSE indicates that the matrix should be filled by columns (the default)

## 3) Array
# Arrays are the R data objects which can store data in more than two dimensions
a1 <- array(c(1:24), c(4, 3, 2))
a1  # print three dimensional data

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24

# Can store only the same types (numeric, logical, character)

## 4) Data frame
# Can store different data types in different columns 
# Every item within a particular column has to be of the samne type
vNumeric   <- c(1, 2, 3)
vCharacter <- c("a", "b", "c")
vLogical   <- c(T, F, T)

df1 <- cbind(vNumeric, vCharacter, vLogical)
df1 # Coerces all values to most basic data type

##      vNumeric vCharacter vLogical
## [1,] "1"      "a"        "TRUE"  
## [2,] "2"      "b"        "FALSE" 
## [3,] "3"      "c"        "TRUE"

df2 <- as.data.frame(cbind(vNumeric, vCharacter, vLogical))
df2 # Makes a data frame with three different data types

##   vNumeric vCharacter vLogical
## 1        1          a     TRUE
## 2        2          b    FALSE
## 3        3          c     TRUE

## 5) List
# Can hold different data types
o1 <- c(1, 2, 3)
o2 <- c("a", "b", "c", "d")
o3 <- c(T, F, T, T, F)

list1 <- list(o1, o2, o3)
list1

## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "a" "b" "c" "d"
## 
## [[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE

list2 <- list(o1, o2, o3, list1) #list 1 is nested in list2
list2

## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "a" "b" "c" "d"
## 
## [[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE
## 
## [[4]]
## [[4]][[1]]
## [1] 1 2 3
## 
## [[4]][[2]]
## [1] "a" "b" "c" "d"
## 
## [[4]][[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE

### Coercing types
## Automatic coercion
# When you attempt to combine different types they will be coerced to the most flexible type. Types from least to most flexible are: logical, integer, double, and character. 

(coerce1 <- c(1, "b", TRUE))

## [1] "1"    "b"    "TRUE"

typeof(coerce1)

## [1] "character"

## Coerce numeric to integer

(coerce2 <- 5)

## [1] 5

typeof (coerce2)

## [1] "double"

(coerce3 <- as.integer(5))

## [1] 5

typeof (coerce3)

## [1] "integer"

## Coerce character to numeric

(coerce4 <- c("1", "2", "3"))

## [1] "1" "2" "3"

typeof (coerce4)

## [1] "character"

(coerce5 <- as.numeric("1", "2", "3"))

## [1] 1

typeof (coerce5)

## [1] "double"

## Coerce matrix to data frame

(coerce6 <- matrix(1:9, nrow=3))

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

is.matrix(coerce6)

## [1] TRUE

(coerce7 <- as.data.frame(matrix(1:9, nrow=3)))

##   V1 V2 V3
## 1  1  4  7
## 2  2  5  8
## 3  3  6  9

is.data.frame(coerce7)

## [1] TRUE

### 2. Header and comment #############################################################

## Format 

## ---------------------------
##
## Script name: 
##
## Purpose of script:
##
## Author:  
##
## Date Created:  
##
## Email:  
##
## ---------------------------
##
## Notes:
##   
##
## ---------------------------

### 3. Packages for R ################################################################
## Install packages: Method 1
install.packages("haven", repos = "http://cran.us.r-project.org")

## package 'haven' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

install.packages("stringr", repos = "http://cran.us.r-project.org")

## package 'stringr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

install.packages("ggplot2", repos = "http://cran.us.r-project.org")

## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

install.packages("dplyr", repos = "http://cran.us.r-project.org")

## package 'dplyr' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'dplyr'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\dplyr\libs\x64\dplyr.dll:
## Permission denied

## Warning: restored 'dplyr'

## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

install.packages("foreign", repos = "http://cran.us.r-project.org")

## package 'foreign' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'foreign'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\foreign\libs\x64\foreign.dll
## to C:
## \Users\ehk994\Documents\R\R-3.6.1\library\foreign\libs\x64\foreign.dll:
## Permission denied

## Warning: restored 'foreign'

## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

install.packages("MASS", repos = "http://cran.us.r-project.org")

## package 'MASS' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'MASS'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\MASS\libs\x64\MASS.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\MASS\libs\x64\MASS.dll:
## Permission denied

## Warning: restored 'MASS'

## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages

## Install packages: Method 2
# In packages, plots, files pane, manually do it

## Load a package: Method 1
library(ggplot2)

## Load a package: Method 2
require(haven)

## Loading required package: haven

### 4. Importing and writing data from a spreadsheet
## If your data is saved as tab-delimited text file or *.txt file with header (default is tab-delimiter so no need to specify "sep = ")
df1 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data1.txt", header = TRUE)

## If your data is saved using another delimeter (i.e. /) without header
df2 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data2.txt", sep = "/", header = F)

## If your data is saved using csv file (delimeter = ,) with header
df3 <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data3.csv", sep = ",", header = T)

## Write csv file
write.csv(df1, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df1.csv", row.names=TRUE)
 
## Export data set to a tab-separated file
write.table(df2, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df2.csv", sep = "\t",row.names = FALSE)

### 5. Simple visualization
# Read Titanic dataset
titanic <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/titanic.csv", sep = ",", header = TRUE)

# Simple Bar Plot
counts <- table(titanic$Pclass)
barplot(counts, main="Passenger Distribution",
        xlab="Class Level")

# Grouped Bar Plot
counts <- table(titanic$Survived, titanic$Pclass)
barplot(counts, main="Passenger Distribution by Survived and 
        Class",xlab="Class Level", col=c("darkblue","red"),legend  
        = rownames(counts), beside=TRUE)

# Boxplot of Fare by Pclass
boxplot(Fare~Pclass,data=titanic, main="Boxplot",
        xlab="Class Level", ylab="Fare of Tickets")

# Simple Pie Chart
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main="Pie Chart of Countries")

# Simple Scatterplot
attach(titanic)
plot(Age, Fare, main="Scatterplot Example",
     xlab="Age", ylab="Fare ", pch=19)

### 6. Subset
## Select variables (i.e., columns)
# Select certain variables with their variable names
myvars <- c("Survived", "Pclass", "Name")
col_titanic <- titanic[myvars]

# Select 1st and 5th through 8th variables
col_titanic2 <- titanic[c(1,5:8)]

## Exclude 3rd and 5th variable
col_titanic3 <- titanic[c(-3,-5)]

## Select observations (i.e., rows)
# Select first 5 obesrvations
row_titanic1 <- titanic[1:5,]

# Select observations based on variable values
sub_titanic <- titanic[ which(titanic$Sex=="female" 
                              & titanic$Age > 35), ]

# Use subset function
sub_titanic2 <- subset(titanic, Age >= 20 | Age < 10,
select=c(Survived, Pclass))

# Use subset function 
sub_titanic2 <- subset(titanic, Sex=="male" & Age > 25,
select=Survived:Age)

# Random samples
# take a random sample of size 50 from a dataset titanic
# sample without replacement
random_titanic <- titanic[sample(1:nrow(titanic), 50,
   replace=FALSE),]

RTutorial

Emily Ko

10/10/2019