### 1. Entering data ###############################################################
## Basic math
2+2 
## [1] 4
## Print numbers
1:100 # Print numbers 1 to 100 across several lines
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
##  [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
##  [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
##  [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100
## Print characters
print("Hello World") #Print Hello World 
## [1] "Hello World"
print("Hello World")
## [1] "Hello World"
## Assign value
# Individual values
a <- 1            # Use <- to assign
a
## [1] 1
2 -> b            # Can go other way, but not an usual way
c <- d <- e <- 3  # Multiple assignments

# Since it is 'assign', it doesn't print / to print
a
## [1] 1
b
## [1] 2
# Multiple values
x <- c(1, 2, 5, 9)  #c = Combine/concatenate
x                   #Print contents of x in in Console
## [1] 1 2 5 9
## Create sequential data
# 0 through 10 and 10 through 0
0:10           
##  [1]  0  1  2  3  4  5  6  7  8  9 10
10:0           
##  [1] 10  9  8  7  6  5  4  3  2  1  0
# Another way to create sequential data
seq(10)        #1 to 10
##  [1]  1  2  3  4  5  6  7  8  9 10
seq(30, 0, by = -3)       #Count down by 3
##  [1] 30 27 24 21 18 15 12  9  6  3  0
## Math
(y <- c(5, 1, 0, 10))  # Surround command with parentheses to print
## [1]  5  1  0 10
x + y         # Adds corresponding elements in x and y
## [1]  6  3  5 19
x*2           # Multiplies each element in x by 2
## [1]  2  4 10 18
2^6           # Powers/exponents
## [1] 64
sqrt(64)      # Squareroot
## [1] 8
log(100)      # Natural log: base e (2.71828...)
## [1] 4.60517
log10(100)    # Base 10 log
## [1] 2
### Data types
## 1) Numeric
n1 <- 15   
n1
## [1] 15
typeof(n1) # Double-precision = essentially same to numeric variable
## [1] "double"
n2 <- 1.5
n2
## [1] 1.5
typeof(n2)
## [1] "double"
# Create numeric vectors
dbl_var <- c(1, 2.5, 4.5) # create a string of double-precision values
dbl_var
## [1] 1.0 2.5 4.5
## 2) Integer
# Create integer vectors
int_var <- c(1L, 6L, 10L) # placing an L after the values creates a string of integers
int_var
## [1]  1  6 10
## 3) Logical 
# What is logical? TRUE and FALSE (or T and F) are reserved words denoting logical constants in the R language 

# Example 1
l1 <- TRUE
l1
## [1] TRUE
typeof(l1)
## [1] "logical"
# Example 2
x = 1; y = 2   # sample values (Notice that x has a different value)
z = x > y      # is x larger than y? 
z              # print the logical value 
## [1] FALSE
class(z)       # print the class name of z 
## [1] "logical"
# Example 3
x <- c(1:10)
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x > 8
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
x < 5
##  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
x > 8 | x < 5 # | is logical operator meaning 'or'
##  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE
## 4) Character
c1 <- "c" 
c1  
## [1] "c"
typeof(c1)
## [1] "character"
c2 <- "a string of text" 
c2  
## [1] "a string of text"
typeof(c2)
## [1] "character"
### Data structures
## 1) Vector
v1 <- c(1, 2, 3, 4, 5)
v1
## [1] 1 2 3 4 5
is.vector(v1)
## [1] TRUE
v2 <- c("a", "b", "c") #string vector
v2
## [1] "a" "b" "c"
is.vector(v2)
## [1] TRUE
v3 <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
v3
## [1]  TRUE  TRUE FALSE FALSE  TRUE
is.vector(v3)
## [1] TRUE
## 2) Matrix
m1 <- matrix(c(T, T, F, F, T, F), nrow = 2)
m1
##      [,1]  [,2]  [,3]
## [1,] TRUE FALSE  TRUE
## [2,] TRUE FALSE FALSE
m2 <- matrix(c("a", "b",
               "c", "d"),
               nrow = 2,
               byrow = T)
m2
##      [,1] [,2]
## [1,] "a"  "b" 
## [2,] "c"  "d"
#byrow=TRUE indicates that the matrix should be filled by rows. byrow=FALSE indicates that the matrix should be filled by columns (the default)
## 3) Array
# Arrays are the R data objects which can store data in more than two dimensions
a1 <- array(c(1:24), c(4, 3, 2))
a1  # print three dimensional data
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24
# Can store only the same types (numeric, logical, character) 
## 4) Data frame
# Can store different data types in different columns 
# Every item within a particular column has to be of the samne type
vNumeric   <- c(1, 2, 3)
vCharacter <- c("a", "b", "c")
vLogical   <- c(T, F, T)

df1 <- cbind(vNumeric, vCharacter, vLogical)
df1 # Coerces all values to most basic data type
##      vNumeric vCharacter vLogical
## [1,] "1"      "a"        "TRUE"  
## [2,] "2"      "b"        "FALSE" 
## [3,] "3"      "c"        "TRUE"
df2 <- as.data.frame(cbind(vNumeric, vCharacter, vLogical))
df2 # Makes a data frame with three different data types
##   vNumeric vCharacter vLogical
## 1        1          a     TRUE
## 2        2          b    FALSE
## 3        3          c     TRUE
## 5) List
# Can hold different data types
o1 <- c(1, 2, 3)
o2 <- c("a", "b", "c", "d")
o3 <- c(T, F, T, T, F)

list1 <- list(o1, o2, o3)
list1
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "a" "b" "c" "d"
## 
## [[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE
list2 <- list(o1, o2, o3, list1) #list 1 is nested in list2
list2
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "a" "b" "c" "d"
## 
## [[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE
## 
## [[4]]
## [[4]][[1]]
## [1] 1 2 3
## 
## [[4]][[2]]
## [1] "a" "b" "c" "d"
## 
## [[4]][[3]]
## [1]  TRUE FALSE  TRUE  TRUE FALSE
### Coercing types
## Automatic coercion
# When you attempt to combine different types they will be coerced to the most flexible type. Types from least to most flexible are: logical, integer, double, and character. 

(coerce1 <- c(1, "b", TRUE))
## [1] "1"    "b"    "TRUE"
typeof(coerce1)
## [1] "character"
## Coerce numeric to integer

(coerce2 <- 5)
## [1] 5
typeof (coerce2)
## [1] "double"
(coerce3 <- as.integer(5))
## [1] 5
typeof (coerce3)
## [1] "integer"
## Coerce character to numeric

(coerce4 <- c("1", "2", "3"))
## [1] "1" "2" "3"
typeof (coerce4)
## [1] "character"
(coerce5 <- as.numeric("1", "2", "3"))
## [1] 1
typeof (coerce5)
## [1] "double"
## Coerce matrix to data frame

(coerce6 <- matrix(1:9, nrow=3))
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
is.matrix(coerce6)
## [1] TRUE
(coerce7 <- as.data.frame(matrix(1:9, nrow=3)))
##   V1 V2 V3
## 1  1  4  7
## 2  2  5  8
## 3  3  6  9
is.data.frame(coerce7)
## [1] TRUE
### 2. Header and comment #############################################################

## Format 

## ---------------------------
##
## Script name: 
##
## Purpose of script:
##
## Author:  
##
## Date Created:  
##
## Email:  
##
## ---------------------------
##
## Notes:
##   
##
## ---------------------------
### 3. Packages for R ################################################################
## Install packages: Method 1
install.packages("haven", repos = "http://cran.us.r-project.org")
## package 'haven' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("stringr", repos = "http://cran.us.r-project.org")
## package 'stringr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("ggplot2", repos = "http://cran.us.r-project.org")
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("foreign", repos = "http://cran.us.r-project.org")
## package 'foreign' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'foreign'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\foreign\libs\x64\foreign.dll
## to C:
## \Users\ehk994\Documents\R\R-3.6.1\library\foreign\libs\x64\foreign.dll:
## Permission denied
## Warning: restored 'foreign'
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
install.packages("MASS", repos = "http://cran.us.r-project.org")
## package 'MASS' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'MASS'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\ehk994\Documents\R\R-3.6.1\library\00LOCK\MASS\libs\x64\MASS.dll
## to C:\Users\ehk994\Documents\R\R-3.6.1\library\MASS\libs\x64\MASS.dll:
## Permission denied
## Warning: restored 'MASS'
## 
## The downloaded binary packages are in
##  C:\Users\ehk994\AppData\Local\Temp\RtmpcnPZq6\downloaded_packages
## Install packages: Method 2
# In packages, plots, files pane, manually do it
## Load a package: Method 1
library(ggplot2)

## Load a package: Method 2
require(haven)
## Loading required package: haven
### 4. Importing and writing data from a spreadsheet
## If your data is saved as tab-delimited text file or *.txt file with header (default is tab-delimiter so no need to specify "sep = ")
df1 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data1.txt", header = TRUE)
## If your data is saved using another delimeter (i.e. /) without header
df2 <- read.table("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data2.txt", sep = "/", header = F)
## If your data is saved using csv file (delimeter = ,) with header
df3 <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/data3.csv", sep = ",", header = T)
## Write csv file
write.csv(df1, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df1.csv", row.names=TRUE)
 
## Export data set to a tab-separated file
write.table(df2, file = "C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/write_df2.csv", sep = "\t",row.names = FALSE)
### 5. Simple visualization
# Read Titanic dataset
titanic <- read.csv("C:/Users/ehk994/Desktop/Teaching/Basic tutorial for R/titanic.csv", sep = ",", header = TRUE)
# Simple Bar Plot
counts <- table(titanic$Pclass)
barplot(counts, main="Passenger Distribution",
        xlab="Class Level") 

# Grouped Bar Plot
counts <- table(titanic$Survived, titanic$Pclass)
barplot(counts, main="Passenger Distribution by Survived and 
        Class",xlab="Class Level", col=c("darkblue","red"),legend  
        = rownames(counts), beside=TRUE)

# Boxplot of Fare by Pclass
boxplot(Fare~Pclass,data=titanic, main="Boxplot",
        xlab="Class Level", ylab="Fare of Tickets")

# Simple Pie Chart
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main="Pie Chart of Countries")

# Simple Scatterplot
attach(titanic)
plot(Age, Fare, main="Scatterplot Example",
     xlab="Age", ylab="Fare ", pch=19)

### 6. Subset
## Select variables (i.e., columns)
# Select certain variables with their variable names
myvars <- c("Survived", "Pclass", "Name")
col_titanic <- titanic[myvars]
# Select 1st and 5th through 8th variables
col_titanic2 <- titanic[c(1,5:8)]
## Exclude 3rd and 5th variable
col_titanic3 <- titanic[c(-3,-5)]
## Select observations (i.e., rows)
# Select first 5 obesrvations
row_titanic1 <- titanic[1:5,]
# Select observations based on variable values
sub_titanic <- titanic[ which(titanic$Sex=="female" 
                              & titanic$Age > 35), ]
# Use subset function
sub_titanic2 <- subset(titanic, Age >= 20 | Age < 10,
select=c(Survived, Pclass)) 
# Use subset function 
sub_titanic2 <- subset(titanic, Sex=="male" & Age > 25,
select=Survived:Age) 
# Random samples
# take a random sample of size 50 from a dataset titanic
# sample without replacement
random_titanic <- titanic[sample(1:nrow(titanic), 50,
   replace=FALSE),]