R 101

Hamze Dokoohaki

September 9, 2017

Introduction

Basics

# Using hash sign you could put some comments for your self.
myString <- "Hello, World!"

print (myString)
## [1] "Hello, World!"

Data Types

Basic data types

# Logical
v <- TRUE
# Character
v <- "TRUE"
#Numeric
v <- 23.5
#Integer
v <- 2L
#Complex
v <- 2+5i


print (typeof(v))
## [1] "complex"

Complex data types

  • Vectors 1D - Collection of objects of same type
    • Matrices 2D
    • Arrays nD
# vector
VectorE<- c(1,2,3,4)
# Matrices
matrixE <- matrix(c(1,2,3,4),nrow = 2,byrow = T)
#Arrays
ArrayE <- array(c(1,2,3,4),c(2,2))

Complex data types

  • Lists
  • Data Frames - 2D - All types
# Dataframe
dfE <- data.frame(numbers=c(10,20,30,40),
                  text=c("a","b","c","a"))
# lists
listE <- list(c(2,5,3), 21.3, mean, "MyName", TRUE)

Vectors

v1<-c(1,4,2,9)
v2 <- 6:9# : oprator
v3<-seq(5, 9, by = 0.4)# seq function
v4 <- c('apple','red',5,TRUE)# tricky one
# Operators
v5<-v1[c(1,2)] #Accessing Vector Elements
v6<-v1+v2
#tools
v7<-sort(v1, decreasing = FALSE)
V8<-diff(v1)
#Stat functions
v9<-mean(v1)

Lists

Most of the outputs produced from functions are lists because they need to carry different type of data.

MyFarm<-list(Name="AmesFarm",  ConatcInfo="5151234",
             Helpers=c("Name1","Name2"), Year=2013, Yield=c(150,130,140))

print(MyFarm)
## $Name
## [1] "AmesFarm"
## 
## $ConatcInfo
## [1] "5151234"
## 
## $Helpers
## [1] "Name1" "Name2"
## 
## $Year
## [1] 2013
## 
## $Yield
## [1] 150 130 140

Lists

MyFarm[[1]]
## [1] "AmesFarm"
MyFarm$Name
## [1] "AmesFarm"
names(MyFarm)
## [1] "Name"       "ConatcInfo" "Helpers"    "Year"       "Yield"
#Manipulating List Elements
MyFarm[[1]]<-"BooneFarm"

Example

\(y=a\times x+b\)

x<-rnorm(10, mean = 0, sd = 1)
y<-rnorm(10, mean = 0, sd = 2)

lm.obj=lm( y ~ x )

print (typeof(lm.obj) )
## [1] "list"
names(lm.obj)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"

DataFrames

They are basically tables. Columns with different types.

Yieldf<-data.frame(Year=rep(2011,15),
                   Yield=rnorm(15,150,25))

str(Yieldf)#Get the Structure of the Data Frame
## 'data.frame':    15 obs. of  2 variables:
##  $ Year : num  2011 2011 2011 2011 2011 ...
##  $ Yield: num  154.1 185.4 170.2 176.9 65.2 ...
head(Yieldf)
##   Year     Yield
## 1 2011 154.13517
## 2 2011 185.42099
## 3 2011 170.18541
## 4 2011 176.90200
## 5 2011  65.15987
## 6 2011 192.69567

DataFrames

dataframe[ row , col ]

print(Yieldf[,1]) ## Selecting column/columns
##  [1] 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011
## [15] 2011
print(Yieldf[c(1,2),]) #selecting row/rows - 
##   Year    Yield
## 1 2011 154.1352
## 2 2011 185.4210
MeanY<-mean(Yieldf$Yield) #Finding the mean of the yield column
print(MeanY)
## [1] 151.8047

DataFrames

Yieldf$Avglai<-rnorm(15,2.5,0.5)## column binding

Yieldf<-cbind( Yieldf, Maxlai=rnorm(15,4,0.5) ) ## column binding

head(Yieldf)
##   Year     Yield   Avglai   Maxlai
## 1 2011 154.13517 2.235628 3.962685
## 2 2011 185.42099 2.265439 4.045057
## 3 2011 170.18541 2.426891 4.089151
## 4 2011 176.90200 2.329425 3.707618
## 5 2011  65.15987 3.027811 3.943564
## 6 2011 192.69567 2.563609 4.871428

DataFrames

## How to remove column
Yieldf[,c(3,4)]<-NULL
## Row binding
Yieldf<-rbind(Yieldf,
              cbind(Year=rep(2012,10), Yield=rnorm(10,200,50) ) )
tail(Yieldf)
##    Year    Yield
## 20 2012 151.6045
## 21 2012 254.0704
## 22 2012 219.5583
## 23 2012 228.4704
## 24 2012 204.9643
## 25 2012 185.2883

DataFrames

print(nrow(Yieldf))# number of the rows
## [1] 25
print(ncol(Yieldf))# number of the col
## [1] 2
print(dim(Yieldf)) #finding the dimesnion
## [1] 25  2
print(summary(Yieldf))
##       Year          Yield       
##  Min.   :2011   Min.   : 65.16  
##  1st Qu.:2011   1st Qu.:143.49  
##  Median :2011   Median :173.83  
##  Mean   :2011   Mean   :172.36  
##  3rd Qu.:2012   3rd Qu.:204.96  
##  Max.   :2012   Max.   :254.07

DataFrames

#Filter year 2012
Yieldf2012<-Yieldf[ Yieldf$Year==2012 , ]

head(Yieldf2012)
##    Year    Yield
## 16 2012 233.3773
## 17 2012 203.0118
## 18 2012 130.4722
## 19 2012 221.1151
## 20 2012 151.6045
## 21 2012 254.0704

Other

  • Factors
  • Strings
  • Dates

Decision making

CSV Files

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Functions

plot(pressure)

Loops