This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

A friendly Introduction to R

df<-data.frame(a=c("Ha","Van","Tuyen","Ha","Van"),num=c(1:5))

ha<- df[c("Ha","Tuyen") %in% df$a,]

ha
##       a num
## 1    Ha   1
## 2   Van   2
## 3 Tuyen   3
## 4    Ha   4
## 5   Van   5
rd<-runif(100)

hist(rd,col=rainbow(length(rd)))

Matrix operations

# matrix multiplication

a<-matrix(1:6,nrow=3,ncol=4,byrow = T)

b<-matrix(4:9,nrow = 3,ncol=4,byrow = F)

mt<-a%*%t(b) # matrix a times matrix b

rownames(mt)<-paste("row_",1:nrow(mt)) # assign row name

colnames(mt)<-paste("col_",1:ncol(mt)) # assign column names

mt
##        col_ 1 col_ 2 col_ 3
## row_ 1     58     68     78
## row_ 2     80     94    108
## row_ 3    102    120    138
# check files

list.files()
##  [1] "ad.html"             "ad.nb.html"          "ad.pdf"             
##  [4] "ad.Rmd"              "as.nb.html"          "as.Rmd"             
##  [7] "AS.tif"              "das.Rmd"             "das_files"          
## [10] "dsa.html"            "dsa.nb.html"         "dsa.Rmd"            
## [13] "dsad.html"           "dsad.nb.html"        "dsad.Rmd"           
## [16] "ew.html"             "ew.Rmd"              "Joint_dplyr.html"   
## [19] "Joint_dplyr.nb.html" "Joint_dplyr.Rmd"     "my_iris.csv"        
## [22] "QGIS Saving.osm"     "Rmark.nb.html"       "Rmark.Rmd"          
## [25] "rsconnect"           "tu1.html"            "tu1.pdf"            
## [28] "tu1.Rmd"             "tuyemap.html"        "tuyemap.nb.html"    
## [31] "tuyemap.Rmd"         "tuyen1_files"        "tuyen3_files"       
## [34] "tuyen5.Rmd"          "tuyen6.Rmd"          "tuyen6_files"       
## [37] "tuyen7.nb.html"      "tuyen7.Rmd"          "Yen Data"
list.files(pattern = ".csv$",full.names = T)
## [1] "./my_iris.csv"
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
mt<-mtcars
# Set the value of 4 in ` Gear` =NA

i<- mt$gear==4

mt$gear[i]<-NA

# another way 

mt$gear[mt$gear==4]<-NA

head(mt)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1   NA    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1   NA    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1   NA    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# get a unique set of values 

library(caret)
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
unique(iris$Species) # provide a unique label of species in the dataset
## [1] setosa     versicolor virginica 
## Levels: setosa versicolor virginica
table(iris$Species) # calculate the number of each species
## 
##     setosa versicolor  virginica 
##         50         50         50
# change name of levels

iris$Species<-as.character(iris$Species)

iris$Species[iris$Species %in% "setosa"]<-"Setosa"  # This one works for character

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  Setosa
## 2          4.9         3.0          1.4         0.2  Setosa
## 3          4.7         3.2          1.3         0.2  Setosa
## 4          4.6         3.1          1.5         0.2  Setosa
## 5          5.0         3.6          1.4         0.2  Setosa
## 6          5.4         3.9          1.7         0.4  Setosa
# Change a certain character in a word

iris$Species<-gsub("v","V",iris$Species)

tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 Virginica
## 146          6.7         3.0          5.2         2.3 Virginica
## 147          6.3         2.5          5.0         1.9 Virginica
## 148          6.5         3.0          5.2         2.0 Virginica
## 149          6.2         3.4          5.4         2.3 Virginica
## 150          5.9         3.0          5.1         1.8 Virginica
# First function

f<- function(name){
  x<-paste("Hello",name,"!")
  
  return(x)
}

f("Tuyen")
## [1] "Hello Tuyen !"
# Calculating area of rectangular

f1<- function(x,y){
  
  area<-x*y
  
  area<-paste(area,"square metter")
  
  return(area)
}

f1(20,30)
## [1] "600 square metter"
# Calculate area of circle 

f2<- function(r){
  area<-pi*4^2
  
  return(round(area,2))
}

f2(8)
## [1] 50.27
# create a matrix

my_matrix<- matrix(1:10,ncol=5,nrow=5,byrow = T)

# apply function, `1` indicates by row (calcuating mean of matrix by row)

apply(my_matrix,1,mean)
## [1] 3 8 3 8 3
# another way to mean by row

rowMeans(my_matrix) # other useful functions like sum, std, min,max,range,median, and quantile
## [1] 3 8 3 8 3
# `2` indicates for column

apply(my_matrix,2,mean)
## [1] 3 4 5 6 7
colMeans(my_matrix)
## [1] 3 4 5 6 7
library(caret)

tapply(iris$Sepal.Length,iris$Species,mean,na.rm=T)
##     Setosa Versicolor  Virginica 
##      5.006      5.936      6.588
tapply(iris$Sepal.Width,iris$Species,mean,na.rm=T)
##     Setosa Versicolor  Virginica 
##      3.428      2.770      2.974
# first way

aggregate(iris[,-5],iris[,5,drop=F],mean)
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     Setosa        5.006       3.428        1.462       0.246
## 2 Versicolor        5.936       2.770        4.260       1.326
## 3  Virginica        6.588       2.974        5.552       2.026
# another way

aggregate(iris[,c(1:4)],list(iris$Species),mean,na.rm=T)
##      Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     Setosa        5.006       3.428        1.462       0.246
## 2 Versicolor        5.936       2.770        4.260       1.326
## 3  Virginica        6.588       2.974        5.552       2.026
fp<- for (i in 1:10){
  print("Hello my friend")
}
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
# break and next

for (i in 1:10){
  if ( i %in% c(1,2,3,7)){
    next 
  } 
  if (i>9){
    break
  }
  print(i)
}
## [1] 4
## [1] 5
## [1] 6
## [1] 8
## [1] 9
plot(cars$speed~cars$dist,axes=F,xlab="Distance",ylab="Speed",main="Scatter Plot")

axis(1)

axis(2)

text(20,25,"Cars")

# logistic regression

cars$above30<- cars$dist>30 # create True and False values

head(cars)
##   speed dist above30
## 1     4    2   FALSE
## 2     4   10   FALSE
## 3     7    4   FALSE
## 4     7   22   FALSE
## 5     8   16   FALSE
## 6     9   10   FALSE
glm1<-glm(above30~speed, data=cars, family = "binomial")

# Using predict function to predict 1:30

p<- predict(glm1, newdata = data.frame(speed=c(1:30)),type="response")


plot(above30~speed,data=cars, main="Logistic Regression Plot",col=c(2,3))

lines(1:30,p,col=4)

# Enlarge plot size 

par(mar=rep(2,4))
# create some random numbers
x<-rnorm(10)

y<-rnorm(10)

set.seed(123)

value<-runif(length(x))*5

plot(y~x,cex=value, pch=16,col=as.numeric(value))

text(x,y,LETTERS[1:10],pos=3)

longitude <- c(-116.7, -120.4, -116.7, -113.5, -115.5, -120.8, -119.5, -113.7, -113.7, -110.7)
latitude <- c(45.3, 42.6, 38.9, 42.1, 35.7, 38.9, 36.2, 39, 41.6, 36.9)

value<-runif(10)

lonlat <- cbind(longitude, latitude)

class(lonlat)
## [1] "matrix"
# We already created a matrix 'lonlat'

library(sp)
## Warning: package 'sp' was built under R version 3.2.5
# we can create a spatialpoints. This can be used for matrix

pt<- SpatialPoints(lonlat)

class(pt)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
# Access their attributes

showDefault(pt)
## An object of class "SpatialPoints"
## Slot "coords":
##       longitude latitude
##  [1,]    -116.7     45.3
##  [2,]    -120.4     42.6
##  [3,]    -116.7     38.9
##  [4,]    -113.5     42.1
##  [5,]    -115.5     35.7
##  [6,]    -120.8     38.9
##  [7,]    -119.5     36.2
##  [8,]    -113.7     39.0
##  [9,]    -113.7     41.6
## [10,]    -110.7     36.9
## 
## Slot "bbox":
##              min    max
## longitude -120.8 -110.7
## latitude    35.7   45.3
## 
## Slot "proj4string":
## CRS arguments: NA
# if longlat contains in a data.frame, we can use 'coordinates(df)<-~x+y
# asign coordinate sytem to SpatialPoints

pt <- SpatialPoints(lonlat, proj4string=CRS('+proj=longlat +datum=WGS84'))

pt
## SpatialPoints:
##       longitude latitude
##  [1,]    -116.7     45.3
##  [2,]    -120.4     42.6
##  [3,]    -116.7     38.9
##  [4,]    -113.5     42.1
##  [5,]    -115.5     35.7
##  [6,]    -120.8     38.9
##  [7,]    -119.5     36.2
##  [8,]    -113.7     39.0
##  [9,]    -113.7     41.6
## [10,]    -110.7     36.9
## Coordinate Reference System (CRS) arguments: +proj=longlat
## +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
# Create a spatial point data frame

df<- data.frame(Id=1:nrow(lonlat),value=value)

my_pt<- SpatialPointsDataFrame(pt,data=df)

my_pt
##       coordinates Id      value
## 1  (-116.7, 45.3)  1 0.95683335
## 2  (-120.4, 42.6)  2 0.45333416
## 3  (-116.7, 38.9)  3 0.67757064
## 4  (-113.5, 42.1)  4 0.57263340
## 5  (-115.5, 35.7)  5 0.10292468
## 6  (-120.8, 38.9)  6 0.89982497
## 7  (-119.5, 36.2)  7 0.24608773
## 8    (-113.7, 39)  8 0.04205953
## 9  (-113.7, 41.6)  9 0.32792072
## 10 (-110.7, 36.9) 10 0.95450365
# we can also produce `splines`,sppolygons` from raster package

library(raster)
## Warning: package 'raster' was built under R version 3.2.5
sl<-spline(lonlat)

sp<-spPolygons(lonlat,crs = ("+proj=longlat +datum=WGS84"))

plot(sp,axes=T)

plot(sp,col=3,border=4,add=T)

# If polygon shpefile has no coordinate system, we can assign it
library(sp)

# example   crs(my_shapefile)<-CRS("+proj=longlat +datum = WGS84") 

# remove it

#   crs(my_shapefile)<-NA
# Another way to assign coordinate to sp object from x,y data

x<-runif(10)

y<-runif(10)

df<-data.frame(x,y)


coordinates(df)<-~x+y

proj4string(df)<-CRS("+proj=longlat +datum=WGS84")

df
## class       : SpatialPoints 
## features    : 10 
## extent      : 0.1471136, 0.9942698, 0.02461368, 0.9630242  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0