This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
a <- list(aa=1, bb=2, cc=3)
a
## $aa
## [1] 1
##
## $bb
## [1] 2
##
## $cc
## [1] 3
a$aa + a$cc
## [1] 4
a$bb
## [1] 2
mylist <- list(a=20,b=30,c=40,d=50)
for (value in mylist){
mylist$d <- mylist$d + value
print (value)
print(mylist$d)
}
## [1] 20
## [1] 70
## [1] 30
## [1] 100
## [1] 40
## [1] 140
## [1] 50
## [1] 190
mylist <- c(20, 30,40, 50)
for (value in mylist){
print (value)
}
## [1] 20
## [1] 30
## [1] 40
## [1] 50
example(mean)
##
## mean> x <- c(0:10, 50)
##
## mean> xm <- mean(x)
##
## mean> c(xm, mean(x, trim = 0.10))
## [1] 8.75 5.50
help(mean)
## starting httpd help server ... done
?(mean)
max(4,5,12,8,9)
## [1] 12
max(c(4,5,12,8,9))
## [1] 12
example(sample)
##
## sample> x <- 1:12
##
## sample> # a random permutation
## sample> sample(x)
## [1] 10 5 2 7 9 3 12 8 6 4 11 1
##
## sample> # bootstrap resampling -- only if length(x) > 1 !
## sample> sample(x, replace = TRUE)
## [1] 8 12 8 12 1 5 1 2 5 7 1 6
##
## sample> # 100 Bernoulli trials
## sample> sample(c(0,1), 100, replace = TRUE)
## [1] 0 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 0
## [38] 0 1 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0
## [75] 1 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0
##
## sample> ## More careful bootstrapping -- Consider this when using sample()
## sample> ## programmatically (i.e., in your function or simulation)!
## sample>
## sample> # sample()'s surprise -- example
## sample> x <- 1:10
##
## sample> sample(x[x > 8]) # length 2
## [1] 10 9
##
## sample> sample(x[x > 9]) # oops -- length 10!
## [1] 8 4 2 7 3 5 6 1 10 9
##
## sample> sample(x[x > 10]) # length 0
## integer(0)
##
## sample> ## safer version:
## sample> resample <- function(x, ...) x[sample.int(length(x), ...)]
##
## sample> resample(x[x > 8]) # length 2
## [1] 10 9
##
## sample> resample(x[x > 9]) # length 1
## [1] 10
##
## sample> resample(x[x > 10]) # length 0
## integer(0)
##
## sample> ## R 3.0.0 and later
## sample> sample.int(1e10, 12, replace = TRUE)
## [1] 9924763820 489940550 3289278882 6835350325 9309744136 7828199069
## [7] 8496299045 1280496239 2295404139 3304675824 4519860837 6857869791
##
## sample> sample.int(1e10, 12) # not that there is much chance of duplicates
## [1] 1921215601 1789305775 3142389107 386366575 5684403084 2390372851
## [7] 121274797 9479004110 1005165381 217301856 7831712472 8529189185
sample(1:10, 5)
sample(1:100, replace=TRUE)
#set.seed(200) makes our code to reproduce the same set of numbers
rnorm(10)
## [1] -0.03941122 -0.28337772 1.99217807 2.44597650 0.74793174 -0.41219076
## [7] 0.82421296 2.18279006 -0.45964490 0.47383804
rnorm(10)
## [1] -0.2679571 1.5251823 1.5147949 0.8141427 -1.0449971 -1.2773749
## [7] 1.2563640 0.3155634 -0.9439116 -1.1811224
set.seed(100)
rnorm(10)
## [1] -0.50219235 0.13153117 -0.07891709 0.88678481 0.11697127 0.31863009
## [7] -0.58179068 0.71453271 -0.82525943 -0.35986213
max(sample(1:100, 30))
## [1] 96
library(help="datasets")
x <- 20
x1 <- sqrt(20)
print(x1)
## [1] 4.472136
x <- rnorm(50)
y<- x + rnorm(50, mean=50, sd=.1)
print(x)
## [1] -0.11119350 -0.69001432 -0.22179423 0.18290768 0.41732329 1.06540233
## [7] 0.97020202 -0.10162924 1.40320349 -1.77677563 0.62286739 -0.52228335
## [13] 1.32223096 -0.36344033 1.31906574 0.04377907 -1.87865588 -0.44706218
## [19] -1.73859795 0.17886485 1.89746570 -2.27192549 0.98046414 -1.39882562
## [25] 1.82487242 1.38129873 -0.83885188 -0.26199577 -0.06884403 -0.37888356
## [31] 2.58195893 0.12983414 -0.71302498 0.63799424 0.20169159 -0.06991695
## [37] -0.09248988 0.44890327 -1.06435567 -1.16241932 1.64852175 -2.06209602
## [43] 0.01274972 -1.08752835 0.27053949 1.00845187 -2.07440475 0.89682227
## [49] -0.04999577 -1.34534931
print(y)
## [1] 49.69569 49.38094 49.76242 50.20454 50.49906 51.23812 50.95982 49.84266
## [9] 51.54603 48.13393 50.50711 49.42469 51.56680 49.55331 51.36042 49.92591
## [17] 48.00394 49.51965 48.39771 50.13195 51.98175 47.58228 50.94043 48.52353
## [25] 51.78794 51.50531 49.15040 49.75526 49.95662 49.55966 52.43904 50.09674
## [33] 49.29981 50.73981 50.17613 49.89983 50.06903 50.37153 48.97804 48.77919
## [41] 51.69003 47.78338 49.96087 48.88449 50.37129 50.96149 47.95538 50.85504
## [49] 49.86497 48.72356
cor(x,y)
## [1] 0.9970364
mysum <- function(a,b,c){
sum <- a + b + c
return(sum)
}
print(mysum(5,10,20))
## [1] 35
mysum(10,100,200)
## [1] 310
?var
example (var)
##
## var> var(1:10) # 9.166667
## [1] 9.166667
##
## var> var(1:5, 1:5) # 2.5
## [1] 2.5
##
## var> ## Two simple vectors
## var> cor(1:10, 2:11) # == 1
## [1] 1
##
## var> ## Correlation Matrix of Multivariate sample:
## var> (Cl <- cor(longley))
## GNP.deflator GNP Unemployed Armed.Forces Population
## GNP.deflator 1.0000000 0.9915892 0.6206334 0.4647442 0.9791634
## GNP 0.9915892 1.0000000 0.6042609 0.4464368 0.9910901
## Unemployed 0.6206334 0.6042609 1.0000000 -0.1774206 0.6865515
## Armed.Forces 0.4647442 0.4464368 -0.1774206 1.0000000 0.3644163
## Population 0.9791634 0.9910901 0.6865515 0.3644163 1.0000000
## Year 0.9911492 0.9952735 0.6682566 0.4172451 0.9939528
## Employed 0.9708985 0.9835516 0.5024981 0.4573074 0.9603906
## Year Employed
## GNP.deflator 0.9911492 0.9708985
## GNP 0.9952735 0.9835516
## Unemployed 0.6682566 0.5024981
## Armed.Forces 0.4172451 0.4573074
## Population 0.9939528 0.9603906
## Year 1.0000000 0.9713295
## Employed 0.9713295 1.0000000
##
## var> ## Graphical Correlation Matrix:
## var> symnum(Cl) # highly correlated
## GNP. GNP U A P Y E
## GNP.deflator 1
## GNP B 1
## Unemployed , , 1
## Armed.Forces . . 1
## Population B B , . 1
## Year B B , . B 1
## Employed B B . . B B 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> ## Spearman's rho and Kendall's tau
## var> symnum(clS <- cor(longley, method = "spearman"))
## GNP. GNP U A P Y E
## GNP.deflator 1
## GNP B 1
## Unemployed , , 1
## Armed.Forces . 1
## Population B B , 1
## Year B B , 1 1
## Employed B B . B B 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> symnum(clK <- cor(longley, method = "kendall"))
## GNP. GNP U A P Y E
## GNP.deflator 1
## GNP B 1
## Unemployed . . 1
## Armed.Forces 1
## Population B B . 1
## Year B B . 1 1
## Employed * * . + + 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> ## How much do they differ?
## var> i <- lower.tri(Cl)
##
## var> cor(cbind(P = Cl[i], S = clS[i], K = clK[i]))
## P S K
## P 1.0000000 0.9802390 0.9572562
## S 0.9802390 1.0000000 0.9742171
## K 0.9572562 0.9742171 1.0000000
##
## var> ## cov2cor() scales a covariance matrix by its diagonal
## var> ## to become the correlation matrix.
## var> cov2cor # see the function definition {and learn ..}
## function (V)
## {
## p <- (d <- dim(V))[1L]
## if (!is.numeric(V) || length(d) != 2L || p != d[2L])
## stop("'V' is not a square numeric matrix")
## Is <- sqrt(1/diag(V))
## if (any(!is.finite(Is)))
## warning("diag(.) had 0 or NA entries; non-finite result is doubtful")
## r <- V
## r[] <- Is * V * rep(Is, each = p)
## r[cbind(1L:p, 1L:p)] <- 1
## r
## }
## <bytecode: 0x0000026b59bdbce0>
## <environment: namespace:stats>
##
## var> stopifnot(all.equal(Cl, cov2cor(cov(longley))),
## var+ all.equal(cor(longley, method = "kendall"),
## var+ cov2cor(cov(longley, method = "kendall"))))
##
## var> ##--- Missing value treatment:
## var> C1 <- cov(swiss)
##
## var> range(eigen(C1, only.values = TRUE)$values) # 6.19 1921
## [1] 6.191249 1921.562488
##
## var> ## swM := "swiss" with 3 "missing"s :
## var> swM <- swiss
##
## var> colnames(swM) <- abbreviate(colnames(swiss), minlength=6)
##
## var> swM[1,2] <- swM[7,3] <- swM[25,5] <- NA # create 3 "missing"
##
## var> ## Consider all 5 "use" cases :
## var> (C. <- cov(swM)) # use="everything" quite a few NA's in cov.matrix
## Frtlty Agrclt Exmntn Eductn Cathlc Infn.M
## Frtlty 156.04250 NA NA -79.729510 NA 15.156193
## Agrclt NA NA NA NA NA NA
## Exmntn NA NA NA NA NA NA
## Eductn -79.72951 NA NA 92.456059 NA -2.781684
## Cathlc NA NA NA NA NA NA
## Infn.M 15.15619 NA NA -2.781684 NA 8.483802
##
## var> try(cov(swM, use = "all")) # Error: missing obs...
## Error in cov(swM, use = "all") : missing observations in cov/cor
##
## var> C2 <- cov(swM, use = "complete")
##
## var> stopifnot(identical(C2, cov(swM, use = "na.or.complete")))
##
## var> range(eigen(C2, only.values = TRUE)$values) # 6.46 1930
## [1] 6.462385 1930.505982
##
## var> C3 <- cov(swM, use = "pairwise")
##
## var> range(eigen(C3, only.values = TRUE)$values) # 6.19 1938
## [1] 6.194469 1938.033663
##
## var> ## Kendall's tau doesn't change much:
## var> symnum(Rc <- cor(swM, method = "kendall", use = "complete"))
## F A Ex Ed C I
## Frtlty 1
## Agrclt 1
## Exmntn . . 1
## Eductn . . . 1
## Cathlc . 1
## Infn.M 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> symnum(Rp <- cor(swM, method = "kendall", use = "pairwise"))
## F A Ex Ed C I
## Frtlty 1
## Agrclt 1
## Exmntn . . 1
## Eductn . . . 1
## Cathlc . 1
## Infn.M . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> symnum(R. <- cor(swiss, method = "kendall"))
## F A Ex Ed C I
## Fertility 1
## Agriculture 1
## Examination . . 1
## Education . . . 1
## Catholic . 1
## Infant.Mortality . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## var> ## "pairwise" is closer componentwise,
## var> summary(abs(c(1 - Rp/R.)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.04481 0.09573 0.15214 0.53941
##
## var> summary(abs(c(1 - Rc/R.)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.02021 0.08482 0.50675 0.16192 7.08509
##
## var> ## but "complete" is closer in Eigen space:
## var> EV <- function(m) eigen(m, only.values=TRUE)$values
##
## var> summary(abs(1 - EV(Rp)/EV(R.)) / abs(1 - EV(Rc)/EV(R.)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8942 1.1464 1.2452 1.3732 1.3722 2.3265
se <- function(x){
v <- var(x)
n <- length(x)
return(sqrt(v/n))
}
mySample <- rnorm(100, mean=20, sd=4)
print(mySample)
## [1] 18.159215 25.392738 21.772286 19.396295 21.822195 19.839381 21.824484
## [8] 18.366300 11.454025 20.627288 22.640196 16.072662 15.545425 18.250609
## [15] 17.935555 21.675984 20.536622 24.138746 26.614013 19.928213 19.903187
## [22] 21.000988 18.651502 19.546585 19.604468 21.056347 20.555935 19.030922
## [29] 20.236126 19.290913 23.178721 20.026951 17.480839 18.990041 17.238311
## [36] 20.810169 23.385526 22.528296 20.805654 19.635717 21.157937 19.781260
## [43] 11.832601 21.433477 18.509597 25.073235 28.674401 15.041109 22.359496
## [50] 20.496077 17.905169 22.480912 22.832886 19.627207 18.819213 15.656739
## [57] 17.500740 19.067974 18.996733 23.815581 18.936110 27.581104 18.280037
## [64] 26.302188 20.647765 15.658188 22.307749 20.112687 18.573186 23.410506
## [71] 22.053461 24.072812 15.914084 17.753327 15.949776 7.916743 21.329401
## [78] 24.962046 22.685398 14.679864 16.597679 12.844677 24.926088 18.662401
## [85] 16.130712 23.517481 18.985685 13.928497 19.904467 20.106638 20.654725
## [92] 21.640032 17.490550 22.547672 15.192261 25.386170 17.604931 21.768818
## [99] 22.454106 18.803615
se(mySample)
## [1] 0.3470977
tempConvert <- function(f){
return (0.5555556 * (f -32))
}
tempConvert((100))
## [1] 37.77778
value <- c(12,67,78,100)
for (val in value) {
print(tempConvert(val))
}
## [1] -11.11111
## [1] 19.44445
## [1] 25.55556
## [1] 37.77778
example("factor")
##
## factor> (ff <- factor(substring("statistics", 1:10, 1:10), levels = letters))
## [1] s t a t i s t i c s
## Levels: a b c d e f g h i j k l m n o p q r s t u v w x y z
##
## factor> as.integer(ff) # the internal codes
## [1] 19 20 1 20 9 19 20 9 3 19
##
## factor> (f. <- factor(ff)) # drops the levels that do not occur
## [1] s t a t i s t i c s
## Levels: a c i s t
##
## factor> ff[, drop = TRUE] # the same, more transparently
## [1] s t a t i s t i c s
## Levels: a c i s t
##
## factor> factor(letters[1:20], labels = "letter")
## [1] letter1 letter2 letter3 letter4 letter5 letter6 letter7 letter8
## [9] letter9 letter10 letter11 letter12 letter13 letter14 letter15 letter16
## [17] letter17 letter18 letter19 letter20
## 20 Levels: letter1 letter2 letter3 letter4 letter5 letter6 letter7 ... letter20
##
## factor> class(ordered(4:1)) # "ordered", inheriting from "factor"
## [1] "ordered" "factor"
##
## factor> z <- factor(LETTERS[3:1], ordered = TRUE)
##
## factor> ## and "relational" methods work:
## factor> stopifnot(sort(z)[c(1,3)] == range(z), min(z) < max(z))
##
## factor> ## Don't show:
## factor> of <- ordered(ff)
##
## factor> stopifnot(identical(range(of, rev(of)), of[3:2]),
## factor+ identical(max(of), of[2]))
##
## factor> ## End(Don't show)
## factor>
## factor> ## suppose you want "NA" as a level, and to allow missing values.
## factor> (x <- factor(c(1, 2, NA), exclude = NULL))
## [1] 1 2 <NA>
## Levels: 1 2 <NA>
##
## factor> is.na(x)[2] <- TRUE
##
## factor> x # [1] 1 <NA> <NA>
## [1] 1 <NA> <NA>
## Levels: 1 2 <NA>
##
## factor> is.na(x)
## [1] FALSE TRUE FALSE
##
## factor> # [1] FALSE TRUE FALSE
## factor>
## factor> ## More rational, since R 3.4.0 :
## factor> factor(c(1:2, NA), exclude = "" ) # keeps <NA> , as
## [1] 1 2 <NA>
## Levels: 1 2 <NA>
##
## factor> factor(c(1:2, NA), exclude = NULL) # always did
## [1] 1 2 <NA>
## Levels: 1 2 <NA>
##
## factor> ## exclude = <character>
## factor> z # ordered levels 'A < B < C'
## [1] C B A
## Levels: A < B < C
##
## factor> factor(z, exclude = "C") # does exclude
## [1] <NA> B A
## Levels: A < B
##
## factor> factor(z, exclude = "B") # ditto
## [1] C <NA> A
## Levels: A < C
##
## factor> ## Now, labels maybe duplicated:
## factor> ## factor() with duplicated labels allowing to "merge levels"
## factor> x <- c("Man", "Male", "Man", "Lady", "Female")
##
## factor> ## Map from 4 different values to only two levels:
## factor> (xf <- factor(x, levels = c("Male", "Man" , "Lady", "Female"),
## factor+ labels = c("Male", "Male", "Female", "Female")))
## [1] Male Male Male Female Female
## Levels: Male Female
##
## factor> #> [1] Male Male Male Female Female
## factor> #> Levels: Male Female
## factor>
## factor> ## Using addNA()
## factor> Month <- airquality$Month
##
## factor> table(addNA(Month))
##
## 5 6 7 8 9 <NA>
## 31 30 31 31 30 0
##
## factor> table(addNA(Month, ifany = TRUE))
##
## 5 6 7 8 9
## 31 30 31 31 30
table(g)
#a <- c(“F”, “F”, “M”, “F”, “M”) #a <- factor(g, levels=c(“F”, “M”)) #a
a <-
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar=c(1,1,1,1))
x <- rnorm(100)
y <- rnorm(100)
plot(x,y, xlab="X axis", ylab="Y axis", main="Main plot")
table(g)
g <- factor(c("adult", "juvenile", "adult", "adult", "adult"))
table(g)
## g
## adult juvenile
## 4 1
a <- factor(c("adult","adult", "juvenile", "juvenile","adult"))
table(a)
## a
## adult juvenile
## 3 2
t <- table(a,g)
t
## g
## a adult juvenile
## adult 2 1
## juvenile 2 0
margin.table(t,1)
## a
## adult juvenile
## 3 2
margin.table(t,2)
## g
## adult juvenile
## 4 1
? data.frame
my.dataset <- data.frame(site= c("A", "B", "A", "A", "B"),
season= c("Winter", "summer", "summer","spring", "fall" ),
pH=c(7.4, 6.3, 8.66,7.2, 8.9))
my.dataset
## site season pH
## 1 A Winter 7.40
## 2 B summer 6.30
## 3 A summer 8.66
## 4 A spring 7.20
## 5 B fall 8.90
my.dataset[3,2]
## [1] "summer"
my.dataset$season <- factor (my.dataset$season , levels=c("Winter", "summer", "spring", "fall") )
my.dataset$site <- factor(my.dataset$site, levels=c("A", "B"), ordered=TRUE)
my.dataset$season
## [1] Winter summer summer spring fall
## Levels: Winter summer spring fall
my.dataset$site
## [1] A B A A B
## Levels: A < B
summary(my.dataset)
## site season pH
## A:3 Winter:1 Min. :6.300
## B:2 summer:2 1st Qu.:7.200
## spring:1 Median :7.400
## fall :1 Mean :7.692
## 3rd Qu.:8.660
## Max. :8.900
my.dataset$pH
## [1] 7.40 6.30 8.66 7.20 8.90
my.dataset$pH>7
## [1] TRUE FALSE TRUE TRUE TRUE
#my.dataset[.(Gender=="M" & Post>90), 1:3]
ph_above7 <- my.dataset[my.dataset$pH >= 7, ]
ph_above7
## site season pH
## 1 A Winter 7.40
## 3 A summer 8.66
## 4 A spring 7.20
## 5 B fall 8.90
ph_all_A <- my.dataset[my.dataset$site == "A", ]
ph_all_A
## site season pH
## 1 A Winter 7.40
## 3 A summer 8.66
## 4 A spring 7.20
ph_siteA <- my.dataset[my.dataset$site == "A", "pH"]
ph_siteA
## [1] 7.40 8.66 7.20
ph_site_summer <- my.dataset[my.dataset$season == "summer", ]
ph_site_summer
## site season pH
## 2 B summer 6.30
## 3 A summer 8.66
my.dataset
## site season pH
## 1 A Winter 7.40
## 2 B summer 6.30
## 3 A summer 8.66
## 4 A spring 7.20
## 5 B fall 8.90
NO3 <- c(234.5, 256.6, 654.1, 356.7, 776.4)
my.dataset$NO3 <- NO3
ds_dim <- dim(my.dataset)
ds_dim
## [1] 5 4
#is.data.frame(my.dataset)