#basics
a <- 5
b <- 7
a*b
## [1] 35
a * b
## [1] 35
#creating vectors
vec1 <- c(1, 2, 3)
vec2 <- c("one", "two", "three")
vec3 <- c(1, "two", 3, "four")
typeof(vec1)
## [1] "double"
typeof(vec2)
## [1] "character"
typeof(vec3)
## [1] "character"
vec2[2]
## [1] "two"
vec2[-2]
## [1] "one" "three"
#new vectors
vec1 <- c(1, 2, 3, 4)
vec2 <- c(5, 6, 7, 8)
#matrices
m1 <- matrix(1:8, nrow=2)
m1
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
m2 <- matrix(1:8, nrow=2, byrow=TRUE)
m2
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
m3 <- rbind(vec1, vec2)
m3
## [,1] [,2] [,3] [,4]
## vec1 1 2 3 4
## vec2 5 6 7 8
m4 <- cbind(c(1,5), c(2,6), c(3,7), c(4,8))
m4
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
m5 <- cbind(vec1, vec2)
m5
## vec1 vec2
## [1,] 1 5
## [2,] 2 6
## [3,] 3 7
## [4,] 4 8
#first row only
m4[1,]
## [1] 1 2 3 4
#second column only
m4[,2]
## [1] 2 6
#first and second column only
m4[,1:2]
## [,1] [,2]
## [1,] 1 2
## [2,] 5 6
#first and third column only
m4[,c(1,3)]
## [,1] [,2]
## [1,] 1 3
## [2,] 5 7
#see the list of built-in datasets
data()
View(AirPassengers)
#compute summary measures
View(InsectSprays)
mean(InsectSprays$count)
## [1] 9.5
median(InsectSprays$count)
## [1] 7
sd(InsectSprays$count)
## [1] 7.203286
#by type of spray
mean(InsectSprays$count[InsectSprays$spray=="A"])
## [1] 14.5
mean(InsectSprays$count[InsectSprays$spray=="B"])
## [1] 15.33333
mean(InsectSprays$count[InsectSprays$spray=="C"])
## [1] 2.083333
mean(InsectSprays$count[InsectSprays$spray=="D"])
## [1] 4.916667
mean(InsectSprays$count[InsectSprays$spray=="E"])
## [1] 3.5
mean(InsectSprays$count[InsectSprays$spray=="F"])
## [1] 16.66667
summary(InsectSprays$count)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 3.00 7.00 9.50 14.25 26.00
#exercise
View(precip)
precip
## Mobile Juneau Phoenix Little Rock
## 67.0 54.7 7.0 48.5
## Los Angeles Sacramento San Francisco Denver
## 14.0 17.2 20.7 13.0
## Hartford Wilmington Washington Jacksonville
## 43.4 40.2 38.9 54.5
## Miami Atlanta Honolulu Boise
## 59.8 48.3 22.9 11.5
## Chicago Peoria Indianapolis Des Moines
## 34.4 35.1 38.7 30.8
## Wichita Louisville New Orleans Portland
## 30.6 43.1 56.8 40.8
## Baltimore Boston Detroit Sault Ste. Marie
## 41.8 42.5 31.0 31.7
## Duluth Minneapolis/St Paul Jackson Kansas City
## 30.2 25.9 49.2 37.0
## St Louis Great Falls Omaha Reno
## 35.9 15.0 30.2 7.2
## Concord Atlantic City Albuquerque Albany
## 36.2 45.5 7.8 33.4
## Buffalo New York Charlotte Raleigh
## 36.1 40.2 42.7 42.5
## Bismark Cincinnati Cleveland Columbus
## 16.2 39.0 35.0 37.0
## Oklahoma City Portland Philadelphia Pittsburg
## 31.4 37.6 39.9 36.2
## Providence Columbia Sioux Falls Memphis
## 42.8 46.4 24.7 49.1
## Nashville Dallas El Paso Houston
## 46.0 35.9 7.8 48.2
## Salt Lake City Burlington Norfolk Richmond
## 15.2 32.5 44.7 42.6
## Seattle Tacoma Spokane Charleston Milwaukee
## 38.8 17.4 40.8 29.1
## Cheyenne San Juan
## 14.6 59.2
mean(precip)
## [1] 34.88571
median(precip)
## [1] 36.6
cv <- sd(precip)/mean(precip)*100
cv
## [1] 39.29015
#histogram
library(MASS)
hist(InsectSprays$count, xlab="Number of Insects", main="Frequency Histogram of the Number of Insects")
hist(InsectSprays$count[InsectSprays$spray=="A"], xlab="Number of Insects", main="Frequency Histogram of the Number of Insects (Using Type A Spray)")
#exercise
View(ToothGrowth)
hist(ToothGrowth$len)
hist(ToothGrowth$len[ToothGrowth$supp=="OJ"])
hist(ToothGrowth$len[ToothGrowth$supp=="VC"])
#boxplot
boxplot(InsectSprays$count, ylab="Number of Insects", main="Boxplot of the Number of Insects")
boxplot(InsectSprays$count ~ InsectSprays$spray, ylab="Number of Insects", main="Boxplots of the Number of Insects per Type of Spray")
plot(InsectSprays$spray, InsectSprays$count, ylab="Number of Insects", main="Boxplots of the Number of Insects per Type of Spray (using plot function)")
#exercise
boxplot(ToothGrowth$len ~ ToothGrowth$supp, ylab="Tooth Length", main="Boxplots of Tooth Length per Type of Supplement")
boxplot(warpbreaks$breaks ~ warpbreaks$wool, ylab="Number of Breaks", main="Boxplots of the Number of Breaks per Type of Wool")
boxplot(warpbreaks$breaks ~ warpbreaks$tension, ylab="Number of Breaks", main="Boxplots of the Number of Breaks per Level of Tension")
#scatterplot
plot(faithful$waiting, faithful$eruptions, xlab="Waiting time to next eruption (in mins)", ylab="Eruption time in mins")
#anscombe
plot(anscombe$x1, anscombe$y1)
plot(anscombe$x2, anscombe$y2)
plot(anscombe$x3, anscombe$y3)
plot(anscombe$x4, anscombe$y4)
summary(anscombe)
## x1 x2 x3 x4 y1
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8 Min. : 4.260
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8 1st Qu.: 6.315
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8 Median : 7.580
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9 Mean : 7.501
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8 3rd Qu.: 8.570
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19 Max. :10.840
## y2 y3 y4
## Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median :8.140 Median : 7.11 Median : 7.040
## Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :9.260 Max. :12.74 Max. :12.500
#exercise
plot(Orange$age, Orange$circumference, xlab = "Age of the Tree (in days)", ylab = "Trunk Circumference (in mm)")
#Toothgrowth: compare OJ and VC
qqplot(ToothGrowth$len[ToothGrowth$supp=="OJ"], ToothGrowth$len[ToothGrowth$supp=="VC"], xlab="OJ", ylab = "VC")
qqnorm(trees$Height)
#exercise
qqplot(Loblolly$height, Loblolly$age, xlab="Height (in ft.)", ylab = "Age (in years)")
qqnorm(Loblolly$height)
qqnorm(Loblolly$age)
#final exercise
boxplot(sleep$extra ~ sleep$group, ylab = "Hours of Sleep", main = "Effect of Drugs")
summary(sleep)
## extra group ID
## Min. :-1.600 1:10 1 :2
## 1st Qu.:-0.025 2:10 2 :2
## Median : 0.950 3 :2
## Mean : 1.540 4 :2
## 3rd Qu.: 3.400 5 :2
## Max. : 5.500 6 :2
## (Other):8
mean(sleep$extra[sleep$group==1])
## [1] 0.75
mean(sleep$extra[sleep$group==2])
## [1] 2.33
median(sleep$extra[sleep$group==1])
## [1] 0.35
median(sleep$extra[sleep$group==2])
## [1] 1.75