This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
A friendly Introduction to R
df<-data.frame(a=c("Ha","Van","Tuyen","Ha","Van"),num=c(1:5))
ha<- df[c("Ha","Tuyen") %in% df$a,]
ha
## a num
## 1 Ha 1
## 2 Van 2
## 3 Tuyen 3
## 4 Ha 4
## 5 Van 5
rd<-runif(100)
hist(rd,col=rainbow(length(rd)))
Matrix operations
# matrix multiplication
a<-matrix(1:6,nrow=3,ncol=4,byrow = T)
b<-matrix(4:9,nrow = 3,ncol=4,byrow = F)
mt<-a%*%t(b) # matrix a times matrix b
rownames(mt)<-paste("row_",1:nrow(mt)) # assign row name
colnames(mt)<-paste("col_",1:ncol(mt)) # assign column names
mt
## col_ 1 col_ 2 col_ 3
## row_ 1 58 68 78
## row_ 2 80 94 108
## row_ 3 102 120 138
# check files
list.files()
## [1] "ad.html" "ad.nb.html" "ad.pdf"
## [4] "ad.Rmd" "as.nb.html" "as.Rmd"
## [7] "AS.tif" "das.Rmd" "das_files"
## [10] "dsa.html" "dsa.nb.html" "dsa.Rmd"
## [13] "dsad.html" "dsad.nb.html" "dsad.Rmd"
## [16] "ew.html" "ew.Rmd" "Joint_dplyr.html"
## [19] "Joint_dplyr.nb.html" "Joint_dplyr.Rmd" "my_iris.csv"
## [22] "QGIS Saving.osm" "Rmark.nb.html" "Rmark.Rmd"
## [25] "rsconnect" "tu1.html" "tu1.pdf"
## [28] "tu1.Rmd" "tuyemap.html" "tuyemap.nb.html"
## [31] "tuyemap.Rmd" "tuyen1_files" "tuyen3_files"
## [34] "tuyen5.Rmd" "tuyen6.Rmd" "tuyen6_files"
## [37] "tuyen7.nb.html" "tuyen7.Rmd" "Yen Data"
list.files(pattern = ".csv$",full.names = T)
## [1] "./my_iris.csv"
NA valuehead(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
mt<-mtcars
# Set the value of 4 in ` Gear` =NA
i<- mt$gear==4
mt$gear[i]<-NA
# another way
mt$gear[mt$gear==4]<-NA
head(mt)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 NA 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 NA 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 NA 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# get a unique set of values
library(caret)
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
unique(iris$Species) # provide a unique label of species in the dataset
## [1] setosa versicolor virginica
## Levels: setosa versicolor virginica
table(iris$Species) # calculate the number of each species
##
## setosa versicolor virginica
## 50 50 50
# change name of levels
iris$Species<-as.character(iris$Species)
iris$Species[iris$Species %in% "setosa"]<-"Setosa" # This one works for character
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 Setosa
## 2 4.9 3.0 1.4 0.2 Setosa
## 3 4.7 3.2 1.3 0.2 Setosa
## 4 4.6 3.1 1.5 0.2 Setosa
## 5 5.0 3.6 1.4 0.2 Setosa
## 6 5.4 3.9 1.7 0.4 Setosa
# Change a certain character in a word
iris$Species<-gsub("v","V",iris$Species)
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 Virginica
## 146 6.7 3.0 5.2 2.3 Virginica
## 147 6.3 2.5 5.0 1.9 Virginica
## 148 6.5 3.0 5.2 2.0 Virginica
## 149 6.2 3.4 5.4 2.3 Virginica
## 150 5.9 3.0 5.1 1.8 Virginica
function# First function
f<- function(name){
x<-paste("Hello",name,"!")
return(x)
}
f("Tuyen")
## [1] "Hello Tuyen !"
# Calculating area of rectangular
f1<- function(x,y){
area<-x*y
area<-paste(area,"square metter")
return(area)
}
f1(20,30)
## [1] "600 square metter"
# Calculate area of circle
f2<- function(r){
area<-pi*4^2
return(round(area,2))
}
f2(8)
## [1] 50.27
apply family function# create a matrix
my_matrix<- matrix(1:10,ncol=5,nrow=5,byrow = T)
# apply function, `1` indicates by row (calcuating mean of matrix by row)
apply(my_matrix,1,mean)
## [1] 3 8 3 8 3
# another way to mean by row
rowMeans(my_matrix) # other useful functions like sum, std, min,max,range,median, and quantile
## [1] 3 8 3 8 3
# `2` indicates for column
apply(my_matrix,2,mean)
## [1] 3 4 5 6 7
colMeans(my_matrix)
## [1] 3 4 5 6 7
tapply can be used to calculate statistics for group. It needs to have one categorical variablelibrary(caret)
tapply(iris$Sepal.Length,iris$Species,mean,na.rm=T)
## Setosa Versicolor Virginica
## 5.006 5.936 6.588
tapply(iris$Sepal.Width,iris$Species,mean,na.rm=T)
## Setosa Versicolor Virginica
## 3.428 2.770 2.974
aggregate function is similar to tapply, but calculate statistics for more variables# first way
aggregate(iris[,-5],iris[,5,drop=F],mean)
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 Setosa 5.006 3.428 1.462 0.246
## 2 Versicolor 5.936 2.770 4.260 1.326
## 3 Virginica 6.588 2.974 5.552 2.026
# another way
aggregate(iris[,c(1:4)],list(iris$Species),mean,na.rm=T)
## Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 Setosa 5.006 3.428 1.462 0.246
## 2 Versicolor 5.936 2.770 4.260 1.326
## 3 Virginica 6.588 2.974 5.552 2.026
lapply and sapply functions. While lapply returns a list, sapply returns a vector or matrix
for loop function. for loop iterates number of times predifined
fp<- for (i in 1:10){
print("Hello my friend")
}
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
## [1] "Hello my friend"
# break and next
for (i in 1:10){
if ( i %in% c(1,2,3,7)){
next
}
if (i>9){
break
}
print(i)
}
## [1] 4
## [1] 5
## [1] 6
## [1] 8
## [1] 9
plot(cars$speed~cars$dist,axes=F,xlab="Distance",ylab="Speed",main="Scatter Plot")
axis(1)
axis(2)
text(20,25,"Cars")
# logistic regression
cars$above30<- cars$dist>30 # create True and False values
head(cars)
## speed dist above30
## 1 4 2 FALSE
## 2 4 10 FALSE
## 3 7 4 FALSE
## 4 7 22 FALSE
## 5 8 16 FALSE
## 6 9 10 FALSE
glm1<-glm(above30~speed, data=cars, family = "binomial")
# Using predict function to predict 1:30
p<- predict(glm1, newdata = data.frame(speed=c(1:30)),type="response")
plot(above30~speed,data=cars, main="Logistic Regression Plot",col=c(2,3))
lines(1:30,p,col=4)
# Enlarge plot size
par(mar=rep(2,4))
# create some random numbers
x<-rnorm(10)
y<-rnorm(10)
set.seed(123)
value<-runif(length(x))*5
plot(y~x,cex=value, pch=16,col=as.numeric(value))
text(x,y,LETTERS[1:10],pos=3)
longitude <- c(-116.7, -120.4, -116.7, -113.5, -115.5, -120.8, -119.5, -113.7, -113.7, -110.7)
latitude <- c(45.3, 42.6, 38.9, 42.1, 35.7, 38.9, 36.2, 39, 41.6, 36.9)
value<-runif(10)
lonlat <- cbind(longitude, latitude)
class(lonlat)
## [1] "matrix"
# We already created a matrix 'lonlat'
library(sp)
## Warning: package 'sp' was built under R version 3.2.5
# we can create a spatialpoints. This can be used for matrix
pt<- SpatialPoints(lonlat)
class(pt)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
# Access their attributes
showDefault(pt)
## An object of class "SpatialPoints"
## Slot "coords":
## longitude latitude
## [1,] -116.7 45.3
## [2,] -120.4 42.6
## [3,] -116.7 38.9
## [4,] -113.5 42.1
## [5,] -115.5 35.7
## [6,] -120.8 38.9
## [7,] -119.5 36.2
## [8,] -113.7 39.0
## [9,] -113.7 41.6
## [10,] -110.7 36.9
##
## Slot "bbox":
## min max
## longitude -120.8 -110.7
## latitude 35.7 45.3
##
## Slot "proj4string":
## CRS arguments: NA
# if longlat contains in a data.frame, we can use 'coordinates(df)<-~x+y
# asign coordinate sytem to SpatialPoints
pt <- SpatialPoints(lonlat, proj4string=CRS('+proj=longlat +datum=WGS84'))
pt
## SpatialPoints:
## longitude latitude
## [1,] -116.7 45.3
## [2,] -120.4 42.6
## [3,] -116.7 38.9
## [4,] -113.5 42.1
## [5,] -115.5 35.7
## [6,] -120.8 38.9
## [7,] -119.5 36.2
## [8,] -113.7 39.0
## [9,] -113.7 41.6
## [10,] -110.7 36.9
## Coordinate Reference System (CRS) arguments: +proj=longlat
## +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
# Create a spatial point data frame
df<- data.frame(Id=1:nrow(lonlat),value=value)
my_pt<- SpatialPointsDataFrame(pt,data=df)
my_pt
## coordinates Id value
## 1 (-116.7, 45.3) 1 0.95683335
## 2 (-120.4, 42.6) 2 0.45333416
## 3 (-116.7, 38.9) 3 0.67757064
## 4 (-113.5, 42.1) 4 0.57263340
## 5 (-115.5, 35.7) 5 0.10292468
## 6 (-120.8, 38.9) 6 0.89982497
## 7 (-119.5, 36.2) 7 0.24608773
## 8 (-113.7, 39) 8 0.04205953
## 9 (-113.7, 41.6) 9 0.32792072
## 10 (-110.7, 36.9) 10 0.95450365
# we can also produce `splines`,sppolygons` from raster package
library(raster)
## Warning: package 'raster' was built under R version 3.2.5
sl<-spline(lonlat)
sp<-spPolygons(lonlat,crs = ("+proj=longlat +datum=WGS84"))
plot(sp,axes=T)
plot(sp,col=3,border=4,add=T)
# If polygon shpefile has no coordinate system, we can assign it
library(sp)
# example crs(my_shapefile)<-CRS("+proj=longlat +datum = WGS84")
# remove it
# crs(my_shapefile)<-NA
# Another way to assign coordinate to sp object from x,y data
x<-runif(10)
y<-runif(10)
df<-data.frame(x,y)
coordinates(df)<-~x+y
proj4string(df)<-CRS("+proj=longlat +datum=WGS84")
df
## class : SpatialPoints
## features : 10
## extent : 0.1471136, 0.9942698, 0.02461368, 0.9630242 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0