# Rstudio
getwd()
## [1] "E:/r"
# set directory
setwd("E:/r")
#help
#?
#??
RSiteSearch("mean")
## A search query has been submitted to http://search.r-project.org
## The results page should open in your browser shortly
help(kurtosis)
## No documentation for 'kurtosis' in specified packages and libraries:
## you could try '??kurtosis'
#exiting objects list
ls()
## character(0)
#remove
rm()
#remove all
rm(list=ls())
# history()
#history()
# math
# add
1+3
## [1] 4
2*3+8/2
## [1] 10
x<-2
y<-3
z<-4
x*y+z
## [1] 10
8+9/5^2
## [1] 8.36
# division
1/7
## [1] 0.1428571
options(digits = 4)
1/7
## [1] 0.1429
# integer division
11%/%7
## [1] 1
#remainder
11%%7
## [1] 4
# abosulate value
x<-10.44345
abs(x)
## [1] 10.44
#root
sqrt(x)
## [1] 3.232
log10(x)
## [1] 1.019
# base=2
log(x,base=2)
## [1] 3.385
x<-c(4,5,9,10)
y<-c(4,4,9,10)
identical(x,y)
## [1] FALSE
# near equality, if not generate mean
all.equal
## function (target, current, ...)
## UseMethod("all.equal")
## <bytecode: 0x0684148c>
## <environment: namespace:base>
x+y
## [1] 8 9 18 20
sum(x)
## [1] 28
sum(y)
## [1] 27
sum(x,y)
## [1] 55
which(x==y)
## [1] 1 3 4
# total
sum(x==y)
## [1] 3
x==y
## [1] TRUE FALSE TRUE TRUE
# nearest integer
round(34.67)
## [1] 35
# up
ceiling(34.56)
## [1] 35
# down
floor(12.54)
## [1] 12
# specifie
round(23.567,digits = 1)
## [1] 23.6
# double number + fraction double
# vector cane number(double, integer) , character ""one dimention"
x<-c(-2)
# let's check
is.character(x)
## [1] FALSE
is.double(x)
## [1] TRUE
# double similar atomic
is.atomic(x)
## [1] TRUE
is.vector(x)
## [1] TRUE
is.na(x)
## [1] FALSE
is.integer(x)
## [1] FALSE
y<-1:10
y
## [1] 1 2 3 4 5 6 7 8 9 10
is.integer(y)
## [1] TRUE
# integer without fraction
x1<-c(-2L)
is.integer(x1)
## [1] TRUE
y2<-1.2:2.4
y2
## [1] 1.2 2.2
is.integer(y2)
## [1] FALSE
is.double(y2)
## [1] TRUE
# logic
x3<-c(2,3,4,5)
x4<-c(4,7,8,3)
x3>x4
## [1] FALSE FALSE FALSE TRUE
mx.x3<-max(x3)
min.x4<-min(x4)
mx.x3|min.x4
## [1] TRUE
x4[!is.na(x4)]
## [1] 4 7 8 3
x5 = 1; y5= 2 # sample values
z<- x5 > y5 # is x larger than y?
z # print the logical value
## [1] FALSE
class(z) # print the class name of z
## [1] "logical"
#Standard logical operations are "&" (and), "|" (or), and "!" (negation).
u = TRUE; v = FALSE
u & v # u AND v
## [1] FALSE
u | v # u OR v
## [1] TRUE
!u # negation of u
## [1] FALSE
(x5== 1) & (y5 == 2)
## [1] TRUE
x<-c(1,2,3,4,34,2,34,2334,23,234,34)
###how many elemenet
length(x)
## [1] 11
# small to big
sort(x)
## [1] 1 2 2 3 4 23 34 34 34 234 2334
sort(x,decreasing = TRUE)
## [1] 2334 234 34 34 34 23 4 3 2 2 1
log10(x)
## [1] 0.0000 0.3010 0.4771 0.6021 1.5315 0.3010 1.5315 3.3681 1.3617 2.3692
## [11] 1.5315
str(x)
## num [1:11] 1 2 3 4 34 ...
class(x)
## [1] "numeric"
typeof(x)
## [1] "double"
is.character(x)
## [1] FALSE
x%%3
## [1] 1 2 0 1 1 2 1 0 2 0 1
x[x%%3==0]
## [1] 3 2334 234
x[x/3==0]
## numeric(0)
x[x>20]
## [1] 34 34 2334 23 234 34
x[c(2,3,4)]
## [1] 2 3 4
x%%3==0
## [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
# sequence
seq(1,10,1)
## [1] 1 2 3 4 5 6 7 8 9 10
seq(1,10,length=14)
## [1] 1.000 1.692 2.385 3.077 3.769 4.462 5.154 5.846 6.538 7.231
## [11] 7.923 8.615 9.308 10.000
seq(from=1, to=10,by=1)
## [1] 1 2 3 4 5 6 7 8 9 10
seq(0,21,length.out = 10)
## [1] 0.000 2.333 4.667 7.000 9.333 11.667 14.000 16.333 18.667 21.000
# repetation
rep(1:10,1)
## [1] 1 2 3 4 5 6 7 8 9 10
# repea fashion uncollated , maintain seq
rep(1:10,each=5)
## [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4 5 5 5
## [24] 5 5 6 6 6 6 6 7 7 7 7 7 8 8 8 8 8 9 9 9 9 9 10
## [47] 10 10 10 10
# collated
rep(1:4,times=10)
## [1] 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3
## [36] 4 1 2 3 4
length(x)
## [1] 11
y<-1:11
length(y)
## [1] 11
datafr<-data.frame(x,y)
# character
a<- "hafez"
b<- "ocean"
c1<- c("hafez ahmad")
is.character(c1)
## [1] TRUE
nchar(c1)
## [1] 11
# add
paste(a,b)
## [1] "hafez ocean"
# character + num
paste("I am a student of oceanography",05207021)
## [1] "I am a student of oceanography 5207021"
# to string
toString(c("hafez",1998))
## [1] "hafez, 1998"
datafr1<-data.frame(y,x)
rbind(datafr,datafr1)
## x y
## 1 1 1
## 2 2 2
## 3 3 3
## 4 4 4
## 5 34 5
## 6 2 6
## 7 34 7
## 8 2334 8
## 9 23 9
## 10 234 10
## 11 34 11
## 12 1 1
## 13 2 2
## 14 3 3
## 15 4 4
## 16 34 5
## 17 2 6
## 18 34 7
## 19 2334 8
## 20 23 9
## 21 234 10
## 22 34 11
cbind(datafr,datafr1)
## x y y x
## 1 1 1 1 1
## 2 2 2 2 2
## 3 3 3 3 3
## 4 4 4 4 4
## 5 34 5 5 34
## 6 2 6 6 2
## 7 34 7 7 34
## 8 2334 8 8 2334
## 9 23 9 9 23
## 10 234 10 10 234
## 11 34 11 11 34
# dele
df<-data.frame(x=1,y=3,z=3)
df[, !(colnames(df) %in% c("x"))]
## y z
## 1 3 3
emp.data <- data.frame(
emp_id = c (1:5),
emp_name = c("hafez","ahmad","Mini","rohol","kamal"),
salary = c(643.3,515.2,671.0,729.0,943.25),
start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11","2015-03-27")),
stringsAsFactors = FALSE
)
#concanate
paste("a","b")
## [1] "a b"
paste0("a","b")
## [1] "ab"
paste("a","b","c",sep=",")
## [1] "a,b,c"
paste0(“a”,“b”,“c”,sep=“,”) ```
paste("abb","b","c",sep=",")
## [1] "abb,b,c"
paste(c(1:4))
```
paste(c(1:4),c(5:8),sep="")
## [1] "15" "26" "37" "48"
paste0(c("var","pred"),c(1:5))
## [1] "var1" "pred2" "var3" "pred4" "var5"
attributes(datafr)
## $names
## [1] "x" "y"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11
##
## $class
## [1] "data.frame"
unclass(datafr1)
## $y
## [1] 1 2 3 4 5 6 7 8 9 10 11
##
## $x
## [1] 1 2 3 4 34 2 34 2334 23 234 34
##
## attr(,"row.names")
## [1] 1 2 3 4 5 6 7 8 9 10 11
unlist(datafr1)
## y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11 x1 x2 x3 x4
## 1 2 3 4 5 6 7 8 9 10 11 1 2 3 4
## x5 x6 x7 x8 x9 x10 x11
## 34 2 34 2334 23 234 34
object.size(datafr1)
## 624 bytes
object.size(datafr)
## 624 bytes
table(datafr1)
## x
## y 1 2 3 4 23 34 234 2334
## 1 1 0 0 0 0 0 0 0
## 2 0 1 0 0 0 0 0 0
## 3 0 0 1 0 0 0 0 0
## 4 0 0 0 1 0 0 0 0
## 5 0 0 0 0 0 1 0 0
## 6 0 1 0 0 0 0 0 0
## 7 0 0 0 0 0 1 0 0
## 8 0 0 0 0 0 0 0 1
## 9 0 0 0 0 1 0 0 0
## 10 0 0 0 0 0 0 1 0
## 11 0 0 0 0 0 1 0 0
# list
li<-c(1:5)
li
## [1] 1 2 3 4 5
sum(li)
## [1] 15
l<-c("JJJ","HHH","KJ","ll","oo")
lis<-list(li,l)
class(lis)
## [1] "list"
# not sum
#sum(lis)
lis[1:3]
## [[1]]
## [1] 1 2 3 4 5
##
## [[2]]
## [1] "JJJ" "HHH" "KJ" "ll" "oo"
##
## [[3]]
## NULL
lis[]
## [[1]]
## [1] 1 2 3 4 5
##
## [[2]]
## [1] "JJJ" "HHH" "KJ" "ll" "oo"
n = c(2, 3, 5)
s = c("aa", "bb", "cc", "dd", "ee")
b = c(TRUE, FALSE, TRUE, FALSE, FALSE)
x = list(n, s, b, 3)
x
## [[1]]
## [1] 2 3 5
##
## [[2]]
## [1] "aa" "bb" "cc" "dd" "ee"
##
## [[3]]
## [1] TRUE FALSE TRUE FALSE FALSE
##
## [[4]]
## [1] 3
x[2]
## [[1]]
## [1] "aa" "bb" "cc" "dd" "ee"
ok<-list(bob=c("hafez","jj"),roll=c(1,2))
ok[1]
## $bob
## [1] "hafez" "jj"
ok[2]
## $roll
## [1] 1 2
ok
## $bob
## [1] "hafez" "jj"
##
## $roll
## [1] 1 2
ls()
## [1] "a" "b" "c1" "datafr" "datafr1" "df"
## [7] "emp.data" "l" "li" "lis" "min.x4" "mx.x3"
## [13] "n" "ok" "s" "u" "v" "x"
## [19] "x1" "x3" "x4" "x5" "y" "y2"
## [25] "y5" "z"
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 502568 13.5 940480 25.2 750400 20.1
## Vcells 631365 4.9 1308461 10.0 991137 7.6
# condition and fumnctions
j<-4
if (j>=2){
print( " j is greater than 2")
}
## [1] " j is greater than 2"
j1<-1
if (j1>=2){
print( " j is greater than 2")
}else{
print(" j is not greater")
}
## [1] " j is not greater"
# loop
c<-c(1,23,3)
for (i in c){
print(i)
}
## [1] 1
## [1] 23
## [1] 3
f1<-function(a){
for(e in c){
b<-e^2
print(b)
}
}
add2<-function(a,b,c){
r<- a+b+c
print(r)
}
add2<-function(a,b,c){
r<- a+b+c
print(r)
print(r^3)
}
sumofsq<-function(x,y,z){
sumq<-sum(c(x^2+y^2+z^2))
return (sumq)
}
ocean.marks<-c(90,70,80)
state.marks<-c(80,67,89)
Total<-function(ocean.marks,state.marks){
total<-ocean.marks+state.marks
print(paste("total number is :", total))
}
Total(ocean.marks,state.marks)
## [1] "total number is : 170" "total number is : 137" "total number is : 169"
getresult<- function(test.marks) {
average.marks <- mean(test.marks)
performance.test <- average.marks >= 75
print(paste("average marks :",average.marks, "performance.test :",performance.test))
if(performance.test){
print("Overall performance of the batch is brilliant")
}
else{
print("Overall performance of the batch is average")
}
print("performance test completed")
}
getresult(test.marks= c( 70L , 75L , 80L, 85L))
## [1] "average marks : 77.5 performance.test : TRUE"
## [1] "Overall performance of the batch is brilliant"
## [1] "performance test completed"
getresult(test.marks= c( 50L , 60L , 60L, 70L))
## [1] "average marks : 60 performance.test : FALSE"
## [1] "Overall performance of the batch is average"
## [1] "performance test completed"
r1<-c(3,4,5)
r2<-c(2,3,4,5,6,7)
array(c(r1,r2),dim=c(2,2,2))
## , , 1
##
## [,1] [,2]
## [1,] 3 5
## [2,] 4 2
##
## , , 2
##
## [,1] [,2]
## [1,] 3 5
## [2,] 4 6
# names
clnames<-c("hafez","ocean")
rwnames<-c("r1","r2")
m.names<-c("m1","m2")
a<-c(12,4,5,6,5,6,7)
ar<-a
arr<-array(c(r1,r2),dim=c(2,2,2),dimnames =list(rwnames,clnames,m.names))
arr[1,1,1]
## [1] 3
#1 matrix
arr[1,,]
## m1 m2
## hafez 3 3
## ocean 5 5
arr[2,,]
## m1 m2
## hafez 4 4
## ocean 2 6
# 1-row , 2-col , dim-1
arr[2]
## [1] 4
# sum row wise
apply(arr, 1,sum)
## r1 r2
## 16 16
# matrix and array
x<-matrix(seq(1:16),ncol = 4)
x
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
x[2]
## [1] 2
x[c(1,2,3,4)]
## [1] 1 2 3 4
x[x>10]
## [1] 11 12 13 14 15 16
x[x>=4|x<14]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
x[!is.na(x)]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
which(x==min(x))
## [1] 1
x[c(-2,-3,-4,-5)]
## [1] 1 6 7 8 9 10 11 12 13 14 15 16
m<-matrix(seq(1:9),nrow = 3)
m
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
rowMeans(m)
## [1] 4 5 6
rowMeans(m)
## [1] 4 5 6
rowSums(m)
## [1] 12 15 18
colMeans(m)
## [1] 2 5 8
colSums(m)
## [1] 6 15 24
#"""Statistics basics"""
# for discrete
x<-c(1,20,34,43,23)
mean(x)
## [1] 24.2
x2<-c(1,10)
mean(x2)
## [1] 5.5
# for continus
x3<-c(seq(1,10,0.20))
x3
## [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6
## [15] 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4
## [29] 6.6 6.8 7.0 7.2 7.4 7.6 7.8 8.0 8.2 8.4 8.6 8.8 9.0 9.2
## [43] 9.4 9.6 9.8 10.0
mean(x3)
## [1] 5.5
#harmonic.mean
1/mean(1/x3)
## [1] 3.808
mean(x3)
## [1] 5.5
#gemotric mean
n=length(x3)
prod(x3)^(1/n)
## [1] 4.708
# MODE
library(modeest)
##
## This is package 'modeest' written by P. PONCET.
## For a complete list of functions, use 'library(help = "modeest")' or 'help.start()'.
c<-c(12,23,12,32,23,23,21)
mlv(c,method="mfv")
## Mode (most likely value): 23
## Bickel's modal skewness: -0.2857
## Call: mlv.default(x = c, method = "mfv")
Mode11<- function(x){
ux<-unique(x)
ux[which.max(tabulate(match(x,ux)))]
}
data1<-read.csv("iris.csv")
library(psych)
## Warning: package 'psych' was built under R version 3.4.4
describeBy(data1,group = data1$name)
## Warning in describeBy(data1, group = data1$name): no grouping variable
## requested
## vars n mean sd median trimmed mad min max range
## X 1 150 75.50 43.45 75.50 75.50 55.60 1.0 150.0 149.0
## Sepal.Length 2 150 5.84 0.83 5.80 5.81 1.04 4.3 7.9 3.6
## Sepal.Width 3 150 3.06 0.44 3.00 3.04 0.44 2.0 4.4 2.4
## Petal.Length 4 150 3.76 1.77 4.35 3.76 1.85 1.0 6.9 5.9
## Petal.Width 5 150 1.20 0.76 1.30 1.18 1.04 0.1 2.5 2.4
## Species* 6 150 2.00 0.82 2.00 2.00 1.48 1.0 3.0 2.0
## skew kurtosis se
## X 0.00 -1.22 3.55
## Sepal.Length 0.31 -0.61 0.07
## Sepal.Width 0.31 0.14 0.04
## Petal.Length -0.27 -1.42 0.14
## Petal.Width -0.10 -1.36 0.06
## Species* 0.00 -1.52 0.07
x<-c(1,3,4,5,5,6)
#sample variance
var(x)
## [1] 3.2
# population
# n>1
n<-length(x)
n
## [1] 6
var(x)*(n-1)/n
## [1] 2.667
# n>0
mean((x-mean(x))^2)
## [1] 2.667
# standadr devia
sd(x)
## [1] 1.789
# population
sqrt(mean(x-mean(x))^2)
## [1] 0
#sample vari
var(x)
## [1] 3.2
# po var
x
## [1] 1 3 4 5 5 6
f<-c(1,2,4,0,1,2)
xb<-sum(x*f)/sum(f)
xb
## [1] 4
sum(f*-xb)^2/sum(f-1)
## [1] 400
# or
var(x)*length(x)-1/length(x)
## [1] 19.03
#skew
library(moments)
##
## Attaching package: 'moments'
## The following object is masked from 'package:modeest':
##
## skewness
skewness(c)
## [1] 0.01222
kurtosis(c)
## [1] 2.237
moment(c)
## [1] 20.86
# point estimation
library(MASS)
head(survey)
## Sex Wr.Hnd NW.Hnd W.Hnd Fold Pulse Clap Exer Smoke Height
## 1 Female 18.5 18.0 Right R on L 92 Left Some Never 173.0
## 2 Male 19.5 20.5 Left R on L 104 Left None Regul 177.8
## 3 Male 18.0 13.3 Right L on R 87 Neither None Occas NA
## 4 Male 18.8 18.9 Right R on L NA Neither None Never 160.0
## 5 Male 20.0 20.0 Right Neither 35 Right Some Never 165.0
## 6 Female 18.0 17.7 Right L on R 64 Right Some Never 172.7
## M.I Age
## 1 Metric 18.25
## 2 Imperial 17.58
## 3 <NA> 16.92
## 4 Metric 20.33
## 5 Metric 23.67
## 6 Imperial 21.00
h.s<-survey$Height
# msing vale skip
h.s<-mean(h.s ,na.rm = TRUE)
h.s
## [1] 172.4
#Cumsum(x) - The cumulative sum of a vector
#Cummax(x) - The cumulative maximum value
#Cumin(x) - The cumulative minimum value
#Cumprod(x) - The cumulative product
# gerenate random number
n<-10
runif(n,min = 1,max = 20)
## [1] 10.529 2.653 19.942 12.321 19.872 19.745 14.982 10.316 10.087 10.663
sample(1:25)
## [1] 17 11 23 18 6 19 4 25 8 13 9 16 20 22 15 14 21 1 12 5 3 10 7
## [24] 24 2
# small to big
sample(1:25,replace = TRUE)
## [1] 21 20 19 7 19 17 9 23 7 12 20 14 15 10 20 1 25 7 6 15 2 13 18
## [24] 23 25
sample(1:25,replace = FALSE)
## [1] 15 8 6 25 7 14 4 16 13 19 17 1 21 12 5 2 3 23 18 9 10 11 22
## [24] 20 24
# normal distribution or gaussain
rnorm(25,mean=0,sd=1)
## [1] 1.406853 0.491526 1.362261 -0.000651 0.795228 -0.513307 1.301755
## [8] -1.848534 0.550611 -0.360477 -0.848058 -0.075525 -1.972301 0.897286
## [15] 1.212334 -0.215333 0.981031 0.009579 1.867529 0.329169 1.253988
## [22] 0.073832 -0.410007 -1.369259 1.295524
# cumulative distribution
pnorm(25,mean=0,sd=1)
## [1] 1
# density or probability
dnorm(25,mean=0,sd=1)
## [1] 7.654e-137
qnorm(25,mean=0,sd=1)
## Warning in qnorm(25, mean = 0, sd = 1): NaNs produced
## [1] NaN
# binomial distribution
#q-12,mcq=5, ans at random=4 or less
dbinom(4,size=12,prob=0.2)
## [1] 0.1329
# size =x trials, prob =probabilty of success
dbinom(2,size=12,prob=0.2)
## [1] 0.2835
pbinom(4,size=12,prob=0.2)*100
## [1] 92.74
# poisson distribution
ppois(16,lambda = 12)
## [1] 0.8987
ppois(16,lambda = 12,lower=FALSE)
## [1] 0.1013
# CONTI unifor distri
# ten random nnumber bet 1 and 3
runif(10,min=1,max=3)
## [1] 2.463 1.975 2.413 1.306 2.219 1.102 2.937 2.264 2.764 1.676
# normal ditri
pnorm(84,mean=72,sd=15.2,lower.tail = FALSE)
## [1] 0.2149
# chi squa distri
qchisq(0.95,df=7)
## [1] 14.07
rbinom(25,size = 100,prob=0.5)
## [1] 53 48 46 41 54 49 45 51 58 48 55 55 58 46 54 54 48 43 58 52 45 44 50
## [24] 46 48
# quantile
qbinom(122,size=100,prob = 0.5)
## Warning in qbinom(122, size = 100, prob = 0.5): NaNs produced
## [1] NaN
# exponnetial distributio
rexp(n,rate=1)
## [1] 0.121788 2.672243 0.086457 0.841926 0.832866 1.763601 0.002901
## [8] 0.979430 1.357084 0.658506
rexp(10,rate=1)
## [1] 0.7059 0.3921 0.1798 0.1123 0.4125 3.0682 0.5114 0.1138 1.1975 4.1972
#density
dexp(10,rate=1)
## [1] 4.54e-05
# cdf probability
pexp(10,rate=1)
## [1] 1
# quantile
qexp(12,rate=1)
## Warning in qexp(12, rate = 1): NaNs produced
## [1] NaN
# gammma distribution
rgamma(10,shap=1)
## [1] 0.5344 0.4953 0.1757 1.1470 0.1095 0.9614 0.9495 0.3123 1.4166 0.8584
# SET SEED
set.seed(200)
rnorm(10,mean=0,sd=1)
## [1] 0.08476 0.22646 0.43256 0.55807 0.05976 -0.11464 -1.02058
## [8] -0.29705 0.16815 1.41987
rnorm(10,mean=0,sd=1)
## [1] -0.09953 -0.81830 -0.46930 0.57504 -1.87175 -0.63183 -0.04244
## [8] 1.44211 -0.92089 -0.01561
#student distri
qt(c(.94,.89),df=5)
## [1] 1.873 1.401
#f distri
qf(0.89,df1=2,df2=2)
## [1] 8.091
library(MASS)
# point estimation NA.RM =SKIP MISSING
mean(survey$Height,na.rm = TRUE)
## [1] 172.4
# 95% ERROR AND INTERVAL CONFIDENCE
H.RES=na.omit(survey$Height)
H.RES
## [1] 173.0 177.8 160.0 165.0 172.7 182.9 157.0 175.0 167.0 156.2 155.0
## [12] 155.0 156.0 157.0 182.9 190.5 177.0 190.5 180.3 180.3 184.0 172.7
## [23] 175.3 167.0 180.0 166.4 180.0 190.0 168.0 182.5 185.0 171.0 169.0
## [34] 154.9 172.0 176.5 180.3 180.3 180.0 170.0 168.0 165.0 200.0 190.0
## [45] 170.2 179.0 182.0 171.0 157.5 177.8 175.3 187.0 167.6 178.0 170.0
## [56] 164.0 183.0 172.0 180.0 170.0 176.0 171.0 167.6 165.0 170.0 165.0
## [67] 165.1 165.1 185.4 176.5 167.6 167.0 162.6 170.0 179.0 183.0 165.0
## [78] 168.0 179.0 190.0 166.5 165.0 175.3 187.0 170.0 159.0 175.0 163.0
## [89] 170.0 172.0 180.0 180.3 175.0 190.5 170.2 185.0 162.6 158.0 159.0
## [100] 193.0 171.0 184.0 177.0 172.0 180.0 175.3 180.3 172.7 178.5 157.0
## [111] 152.0 188.0 178.0 160.0 175.3 189.0 172.0 182.9 170.0 167.0 175.0
## [122] 165.0 172.7 180.0 172.0 185.0 188.0 185.4 165.0 164.0 195.0 165.0
## [133] 152.4 172.7 180.3 173.0 167.6 188.0 187.0 167.0 168.0 191.8 169.2
## [144] 177.0 168.0 170.0 160.0 189.0 180.3 168.0 182.9 165.0 157.5 170.0
## [155] 172.7 164.0 162.6 172.0 165.1 162.5 170.0 175.0 168.0 163.0 165.0
## [166] 173.0 196.0 179.1 180.0 176.0 160.0 157.5 165.0 170.2 154.9 170.0
## [177] 164.0 167.0 174.0 160.0 179.1 168.0 153.5 160.0 165.0 171.5 160.0
## [188] 163.0 165.0 168.9 170.0 185.0 173.0 188.0 171.0 167.6 162.6 150.0
## [199] 170.2 185.0 167.0 185.0 169.0 180.3 165.1 160.0 170.0 183.0 168.5
## attr(,"na.action")
## [1] 3 12 15 25 26 29 31 35 58 68 70 81 83 84 90 92 96
## [18] 108 121 133 157 173 179 203 213 217 225 226
## attr(,"class")
## [1] "omit"
std=sd(H.RES)
std
## [1] 9.848
n=length(survey$Height)
# standar error
sterror=std/sqrt(length(survey$Height))
sterror
## [1] 0.6397
e=qt(.95,df=n-1)
e
## [1] 1.651
hm=mean(H.RES)
hm
## [1] 172.4
hm+c(-e,e)
## [1] 170.7 174.0
#"""~ y ~ x Model y as a function of x
## + y ~ a + b Include columns a as well as b
## - y ~ a - b Include a but exclude b
## : y ~ a : b Estimate the interaction of a and b
## * y ~ a * b Include columns as well as their interaction (that is, y ~ a + b + a:b)
## | y ~ a | b Estimate y as a function of a conditional on b"""###
# linear regression determine relationships between different types of variables.
l<-data(cars)
lg<-lm(speed ~ dist,cars)
lg
##
## Call:
## lm(formula = speed ~ dist, data = cars)
##
## Coefficients:
## (Intercept) dist
## 8.284 0.166
fitted(lg)
## 1 2 3 4 5 6 7 8 9 10
## 8.615 9.940 8.946 11.926 10.933 9.940 11.264 12.589 13.913 11.099
## 11 12 13 14 15 16 17 18 19 20
## 12.920 10.602 11.595 12.258 12.920 12.589 13.913 13.913 15.900 12.589
## 21 22 23 24 25 26 27 28 29 30
## 14.244 18.218 21.529 11.595 12.589 17.225 13.582 14.907 13.582 14.907
## 31 32 33 34 35 36 37 38 39 40
## 16.562 15.238 17.556 20.867 22.192 14.244 15.900 19.543 13.582 16.231
## 41 42 43 44 45 46 47 48 49 50
## 16.893 17.556 18.880 19.211 17.225 19.874 23.516 23.682 28.152 22.357
residuals(lg)
## 1 2 3 4 5 6 7 8
## -4.61504 -5.93958 -1.94618 -4.92639 -2.93299 -0.93958 -1.26412 -2.58866
## 9 10 11 12 13 14 15 16
## -3.91320 -0.09855 -1.91980 1.39815 0.40474 -0.25753 -0.91980 0.41134
## 17 18 19 20 21 22 23 24
## -0.91320 -0.91320 -2.90001 1.41134 -0.24434 -4.21796 -7.52931 3.40474
## 25 26 27 28 29 30 31 32
## 2.41134 -2.22455 2.41793 1.09339 3.41793 2.09339 0.43772 2.76226
## 33 34 35 36 37 38 39 40
## 0.44431 -2.86704 -4.19158 4.75566 3.09999 -0.54250 6.41793 3.76885
## 41 42 43 44 45 46 47 48
## 3.10658 2.44431 1.11977 2.78863 5.77545 4.12636 0.48388 0.31831
## 49 50
## -4.15201 2.64285
formula(lg)
## speed ~ dist
# or
t.test(cars)
##
## One Sample t-test
##
## data: cars
## t = 13, df = 99, p-value <2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 24.60 33.78
## sample estimates:
## mean of x
## 29.19
# chi square
chisq.test(cars$speed,cars$dist)
## Warning in chisq.test(cars$speed, cars$dist): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: cars$speed and cars$dist
## X-squared = 640, df = 610, p-value = 0.2
chisq.test(mtcars$carb, mtcars$cyl)
## Warning in chisq.test(mtcars$carb, mtcars$cyl): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: mtcars$carb and mtcars$cyl
## X-squared = 24, df = 10, p-value = 0.007
# anova
# data import
setwd("E:/r")
#data<-read.table(file.choose(),header=T,sep=',')
data<- read.csv("E:/r/AirPassengers.csv")
head(data)
## X time AirPassengers
## 1 1 1949 112
## 2 2 1949 118
## 3 3 1949 132
## 4 4 1949 129
## 5 5 1949 121
## 6 6 1949 135
attach(data)
## The following object is masked from package:datasets:
##
## AirPassengers
dim(data)
## [1] 144 3
names(data)
## [1] "X" "time" "AirPassengers"
class(time)
## [1] "numeric"
count<- table(AirPassengers)
barplot(count)
percent<- count/725
barplot(percent)
barplot(percent,main="barplot",xlab="x",ylab="y")
barplot(percent,main="barplot",xlab="x",ylab="y",horiz = TRUE)
barplot(percent,main="barplot",xlab="x",ylab="y",las=1)
pie(count)
box()
# boxplot
boxplot(data)
# comp
data<-read.csv("E:/r/LungCapData.csv")
boxplot(LungCap ~ Gender,data=data)
attach(data)
quantile(Age,probs = c(0,0.26,0.50,0.80))
## 0% 26% 50% 80%
## 3 10 13 16
#cut
agegroup<-cut(Age,breaks = c(12,13,15,17,22),labels = c("<13","14/15","6/17","18+"))
agegroup
## [1] <NA> 18+ 6/17 14/15 <NA> <NA> <NA> <NA> 14/15 <NA> 18+
## [12] 6/17 <NA> <NA> <NA> <13 14/15 <NA> <NA> 14/15 <NA> <NA>
## [23] 6/17 <NA> <NA> <NA> <NA> <NA> <NA> 18+ <NA> <13 <13
## [34] <13 <NA> <NA> <NA> <NA> <NA> 6/17 14/15 6/17 <NA> <NA>
## [45] <NA> <NA> <NA> <NA> 6/17 <NA> 14/15 14/15 <NA> <NA> 18+
## [56] <NA> <13 18+ <NA> <NA> <NA> 14/15 <NA> <13 <13 <13
## [67] <NA> <NA> <NA> <NA> 14/15 <NA> <NA> <13 <13 <NA> 14/15
## [78] <NA> 6/17 <NA> <NA> 6/17 6/17 18+ 14/15 <NA> 18+ <NA>
## [89] 14/15 18+ <NA> 14/15 <13 14/15 18+ 18+ 6/17 <NA> 6/17
## [100] <NA> <13 14/15 <13 6/17 6/17 <NA> 6/17 <NA> <NA> <NA>
## [111] 14/15 18+ <NA> <NA> 14/15 14/15 <NA> 18+ <NA> 6/17 6/17
## [122] <NA> 6/17 6/17 18+ <NA> 6/17 18+ <NA> <NA> 14/15 6/17
## [133] <13 <NA> 6/17 <13 14/15 14/15 14/15 <13 <NA> 18+ 18+
## [144] 18+ <NA> 6/17 14/15 14/15 14/15 <NA> <NA> <NA> <NA> <NA>
## [155] 6/17 <NA> <NA> <NA> 6/17 <NA> 6/17 <NA> <NA> 18+ <NA>
## [166] <NA> <13 6/17 <NA> <NA> 14/15 <NA> 14/15 <NA> 6/17 14/15
## [177] <NA> 6/17 6/17 6/17 <13 6/17 14/15 6/17 18+ <NA> 6/17
## [188] <NA> 14/15 <NA> 6/17 14/15 <NA> <13 6/17 6/17 18+ <NA>
## [199] <NA> <NA> <13 6/17 14/15 18+ <NA> <NA> <NA> <NA> <13
## [210] <NA> <NA> 14/15 <NA> 14/15 <NA> <NA> <NA> 18+ 14/15 18+
## [221] 14/15 <NA> <NA> <NA> 14/15 <NA> 14/15 <NA> <NA> <NA> <NA>
## [232] <13 18+ 18+ 14/15 18+ <NA> <NA> 6/17 14/15 <NA> 14/15
## [243] <NA> <NA> <NA> <NA> <NA> <NA> 18+ 18+ 18+ <NA> 18+
## [254] <NA> <NA> <NA> 18+ <NA> <NA> <NA> 14/15 14/15 <NA> <NA>
## [265] 18+ <NA> 6/17 <NA> <NA> <NA> <NA> 14/15 14/15 <NA> 6/17
## [276] <NA> <NA> <13 <NA> <NA> 6/17 14/15 14/15 <NA> <13 <NA>
## [287] <NA> <NA> <NA> 18+ <NA> 18+ <NA> 14/15 <13 18+ <NA>
## [298] <13 <NA> <NA> 18+ <13 <NA> 18+ <NA> <NA> 14/15 14/15
## [309] <NA> 18+ <NA> <13 14/15 18+ <NA> <NA> <NA> <NA> <13
## [320] 6/17 <13 <NA> 14/15 <NA> <NA> 18+ 14/15 <NA> 18+ <NA>
## [331] <NA> <NA> <13 14/15 14/15 <NA> <NA> <13 14/15 18+ 14/15
## [342] <13 <NA> <NA> <NA> 6/17 <NA> <NA> <NA> 14/15 <NA> 6/17
## [353] 6/17 <NA> <NA> 18+ 6/17 14/15 <NA> <NA> <NA> 14/15 18+
## [364] <NA> <NA> <NA> <NA> 6/17 6/17 18+ 6/17 <NA> <13 6/17
## [375] 18+ <NA> <13 <NA> <NA> <NA> <NA> <NA> 14/15 <13 6/17
## [386] 18+ <NA> <13 <NA> <NA> <13 6/17 6/17 <NA> <NA> <NA>
## [397] <NA> 6/17 <13 <NA> <NA> <NA> 14/15 <NA> <NA> <13 <NA>
## [408] 14/15 14/15 18+ <NA> <13 18+ 18+ <NA> <NA> <13 14/15
## [419] 14/15 <NA> <NA> 6/17 <NA> <NA> <NA> <NA> <NA> 18+ 6/17
## [430] 6/17 <NA> <NA> <NA> <NA> 14/15 14/15 <NA> 14/15 6/17 <NA>
## [441] <NA> 18+ 6/17 14/15 <13 <NA> <NA> <NA> <NA> <13 <NA>
## [452] <NA> 14/15 <13 6/17 <NA> 14/15 <NA> <13 <NA> 6/17 <13
## [463] <NA> <NA> 6/17 <NA> 14/15 <NA> <NA> 18+ <NA> 18+ <13
## [474] 14/15 6/17 6/17 14/15 <NA> 6/17 6/17 6/17 6/17 <NA> 6/17
## [485] <NA> 18+ <NA> 18+ <NA> 14/15 <NA> 14/15 <NA> <NA> 18+
## [496] 14/15 14/15 <NA> 14/15 <13 <NA> <13 18+ <NA> 14/15 <NA>
## [507] 18+ 6/17 14/15 <13 <NA> <NA> 18+ <NA> 14/15 <NA> <NA>
## [518] <NA> 14/15 14/15 6/17 <NA> <NA> <13 14/15 <NA> <NA> <NA>
## [529] 6/17 <NA> <NA> <NA> <NA> <NA> <NA> <NA> 14/15 <NA> 14/15
## [540] <NA> <NA> <NA> <NA> <NA> 14/15 14/15 <NA> 18+ <NA> <NA>
## [551] 6/17 <NA> <NA> <13 14/15 14/15 <NA> <13 18+ <NA> <NA>
## [562] 6/17 18+ <NA> <NA> 14/15 <NA> 18+ <NA> 18+ 6/17 <13
## [573] <NA> <NA> 6/17 6/17 6/17 6/17 18+ <NA> <NA> <NA> <NA>
## [584] <NA> <NA> 14/15 <NA> 6/17 <13 <13 14/15 <NA> <NA> 14/15
## [595] <NA> <NA> <NA> <NA> <NA> 14/15 <13 18+ 14/15 14/15 <NA>
## [606] 6/17 6/17 6/17 <NA> <NA> 6/17 14/15 <NA> <NA> <NA> <13
## [617] 14/15 <13 18+ 14/15 <NA> 6/17 <NA> <NA> <NA> <NA> 14/15
## [628] <NA> 18+ <NA> <NA> <NA> <NA> 6/17 <NA> <NA> 14/15 <NA>
## [639] <NA> <13 18+ <NA> <13 18+ <NA> 18+ 6/17 <13 <13
## [650] 14/15 <NA> <NA> <NA> <13 <NA> <NA> 14/15 18+ 14/15 <NA>
## [661] 6/17 <NA> <NA> <NA> 6/17 <NA> <NA> <NA> <NA> <NA> <NA>
## [672] 18+ <NA> <NA> 14/15 6/17 18+ 14/15 14/15 <NA> <13 18+
## [683] <NA> 14/15 18+ 6/17 6/17 14/15 <NA> <NA> <NA> <NA> <13
## [694] <NA> <13 14/15 <NA> 6/17 14/15 <13 <NA> 14/15 <NA> <NA>
## [705] 14/15 18+ 14/15 <NA> 14/15 <NA> 6/17 <NA> <NA> 14/15 6/17
## [716] 18+ <NA> 6/17 6/17 <NA> <NA> 18+ <NA> 14/15 <NA>
## Levels: <13 14/15 6/17 18+
labels(agegroup)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11"
## [12] "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "22"
## [23] "23" "24" "25" "26" "27" "28" "29" "30" "31" "32" "33"
## [34] "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44"
## [45] "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55"
## [56] "56" "57" "58" "59" "60" "61" "62" "63" "64" "65" "66"
## [67] "67" "68" "69" "70" "71" "72" "73" "74" "75" "76" "77"
## [78] "78" "79" "80" "81" "82" "83" "84" "85" "86" "87" "88"
## [89] "89" "90" "91" "92" "93" "94" "95" "96" "97" "98" "99"
## [100] "100" "101" "102" "103" "104" "105" "106" "107" "108" "109" "110"
## [111] "111" "112" "113" "114" "115" "116" "117" "118" "119" "120" "121"
## [122] "122" "123" "124" "125" "126" "127" "128" "129" "130" "131" "132"
## [133] "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150" "151" "152" "153" "154"
## [155] "155" "156" "157" "158" "159" "160" "161" "162" "163" "164" "165"
## [166] "166" "167" "168" "169" "170" "171" "172" "173" "174" "175" "176"
## [177] "177" "178" "179" "180" "181" "182" "183" "184" "185" "186" "187"
## [188] "188" "189" "190" "191" "192" "193" "194" "195" "196" "197" "198"
## [199] "199" "200" "201" "202" "203" "204" "205" "206" "207" "208" "209"
## [210] "210" "211" "212" "213" "214" "215" "216" "217" "218" "219" "220"
## [221] "221" "222" "223" "224" "225" "226" "227" "228" "229" "230" "231"
## [232] "232" "233" "234" "235" "236" "237" "238" "239" "240" "241" "242"
## [243] "243" "244" "245" "246" "247" "248" "249" "250" "251" "252" "253"
## [254] "254" "255" "256" "257" "258" "259" "260" "261" "262" "263" "264"
## [265] "265" "266" "267" "268" "269" "270" "271" "272" "273" "274" "275"
## [276] "276" "277" "278" "279" "280" "281" "282" "283" "284" "285" "286"
## [287] "287" "288" "289" "290" "291" "292" "293" "294" "295" "296" "297"
## [298] "298" "299" "300" "301" "302" "303" "304" "305" "306" "307" "308"
## [309] "309" "310" "311" "312" "313" "314" "315" "316" "317" "318" "319"
## [320] "320" "321" "322" "323" "324" "325" "326" "327" "328" "329" "330"
## [331] "331" "332" "333" "334" "335" "336" "337" "338" "339" "340" "341"
## [342] "342" "343" "344" "345" "346" "347" "348" "349" "350" "351" "352"
## [353] "353" "354" "355" "356" "357" "358" "359" "360" "361" "362" "363"
## [364] "364" "365" "366" "367" "368" "369" "370" "371" "372" "373" "374"
## [375] "375" "376" "377" "378" "379" "380" "381" "382" "383" "384" "385"
## [386] "386" "387" "388" "389" "390" "391" "392" "393" "394" "395" "396"
## [397] "397" "398" "399" "400" "401" "402" "403" "404" "405" "406" "407"
## [408] "408" "409" "410" "411" "412" "413" "414" "415" "416" "417" "418"
## [419] "419" "420" "421" "422" "423" "424" "425" "426" "427" "428" "429"
## [430] "430" "431" "432" "433" "434" "435" "436" "437" "438" "439" "440"
## [441] "441" "442" "443" "444" "445" "446" "447" "448" "449" "450" "451"
## [452] "452" "453" "454" "455" "456" "457" "458" "459" "460" "461" "462"
## [463] "463" "464" "465" "466" "467" "468" "469" "470" "471" "472" "473"
## [474] "474" "475" "476" "477" "478" "479" "480" "481" "482" "483" "484"
## [485] "485" "486" "487" "488" "489" "490" "491" "492" "493" "494" "495"
## [496] "496" "497" "498" "499" "500" "501" "502" "503" "504" "505" "506"
## [507] "507" "508" "509" "510" "511" "512" "513" "514" "515" "516" "517"
## [518] "518" "519" "520" "521" "522" "523" "524" "525" "526" "527" "528"
## [529] "529" "530" "531" "532" "533" "534" "535" "536" "537" "538" "539"
## [540] "540" "541" "542" "543" "544" "545" "546" "547" "548" "549" "550"
## [551] "551" "552" "553" "554" "555" "556" "557" "558" "559" "560" "561"
## [562] "562" "563" "564" "565" "566" "567" "568" "569" "570" "571" "572"
## [573] "573" "574" "575" "576" "577" "578" "579" "580" "581" "582" "583"
## [584] "584" "585" "586" "587" "588" "589" "590" "591" "592" "593" "594"
## [595] "595" "596" "597" "598" "599" "600" "601" "602" "603" "604" "605"
## [606] "606" "607" "608" "609" "610" "611" "612" "613" "614" "615" "616"
## [617] "617" "618" "619" "620" "621" "622" "623" "624" "625" "626" "627"
## [628] "628" "629" "630" "631" "632" "633" "634" "635" "636" "637" "638"
## [639] "639" "640" "641" "642" "643" "644" "645" "646" "647" "648" "649"
## [650] "650" "651" "652" "653" "654" "655" "656" "657" "658" "659" "660"
## [661] "661" "662" "663" "664" "665" "666" "667" "668" "669" "670" "671"
## [672] "672" "673" "674" "675" "676" "677" "678" "679" "680" "681" "682"
## [683] "683" "684" "685" "686" "687" "688" "689" "690" "691" "692" "693"
## [694] "694" "695" "696" "697" "698" "699" "700" "701" "702" "703" "704"
## [705] "705" "706" "707" "708" "709" "710" "711" "712" "713" "714" "715"
## [716] "716" "717" "718" "719" "720" "721" "722" "723" "724" "725"
levels(agegroup)
## [1] "<13" "14/15" "6/17" "18+"
boxplot(LungCap~Smoke,main="lung vs smoke",ylab="lung capacity",las=1)
#las for 1, 2 rotaing x axis texk
boxplot(LungCap[Age>=18]~Smoke[Age>=18],main="lung vs smoke for 18+",ylab="lung capacity",las=1)
boxplot(LungCap~Smoke*agegroup,main="lung vs smoke for 18+",ylab="lung capacity",las=1)
boxplot(LungCap~Smoke*agegroup,main="lung vs smoke for 18+",ylab="lung capacity",las=2)
boxplot(LungCap~Smoke*agegroup,main="lung vs smoke for 18+",ylab="lung capacity",col=c(1:8),las=1)
#curve polynomial regression
period <- 120
x <- 1:120
y <- sin(2*pi*x/period) + runif(length(x),-1,1)
plot(x,y, main="Sine Curve + 'Uniform' Noise")
mtext("showing loess smoothing (local regression smoothing)")
y.loess <- loess(y ~ x, span=0.75, data.frame(x=x, y=y))
y.predict <- predict(y.loess, data.frame(x=x))
plot(x,y, main="Sine Curve + 'Uniform' Noise")
lines(x,y.predict)
legend(1,2, legend=c("Line 1"),col=c("red"),cex=0.8)
f=factor(c("M","M","F","M","M","F","F",'M'))
length(f)
## [1] 8
y=rnorm(8)
length(y)
## [1] 8
x<-c(0,2,8,4,32,64,128,16)
length(x)
## [1] 8
par(mfrow=c(2,2))
plot(y)
plot(x)
data<-read.delim("book1.csv",header = FALSE,skip = 2)
library(data.table)
da<-read.table("book1.csv",header=TRUE,sep=',',stringsAsFactors = TRUE)
fread("book1.csv")
## date max.temp
## 1: 20180112 12
## 2: 20180217 13
## 3: 20180311 14
## 4: 20180324 15
## 5: 20180421 16
## 6: 20180521 17
## 7: 20180302 18
## 8: 20181201 19
## 9: 20180129 20
## 10: 20180808 21
## 11: 20180707 22
## 12: 20180721 23
## 13: 20180202 24
## 14: 20180202 25
## 15: 20180921 26
## 16: 20181111 27
## 17: 20180130 28
## 18: 20180308 29
## 19: 20181010 30
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(dt)
## function (x, df, ncp, log = FALSE)
x<-data("sunspots")
x<-data("sunspot.year")
x
## [1] "sunspot.year"