var1 = 0
var2 = 10
# addition
var1 + var2
## [1] 10
# multiplication
var1 * var2
## [1] 0
10*2
## [1] 20
6/2*(2+1)
## [1] 9
exp(5)
## [1] 148.4132
sqrt(4)
## [1] 2
exp(var1)
## [1] 1
log(10,base=2)
## [1] 3.321928
? before the name of a function:?log
xx = c(1,2,3,4,5,6)
# 5 is added to each element in the vector x
x + 5
## [1] 6 7 8 9 10 11
# each element in x is multiplied by 5
x * 5
## [1] 5 10 15 20 25 30
# each element in x is divided by 5
x / 5
## [1] 0.2 0.4 0.6 0.8 1.0 1.2
yy = c(2,3)
x + y
## [1] 3 5 5 7 7 9
x * y
## [1] 2 6 6 12 10 18
zz = c(1,3,5,6,5,3)
t(z) %*% x #t() is the transpose function
## [,1]
## [1,] 89
z * x
## [1] 1 6 15 24 25 18
z or how many elements are in an objectlength(z)
## [1] 6
zz = c(z,10)
z[1] # first entry
## [1] 1
z[7] # 7th entry
## [1] 10
length() into the square bracketsz[length(z)]
## [1] 10
A = matrix(c(1,2,3,4),nrow=2,ncol=2,byrow=TRUE) # inputs by row
A
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
A2 = matrix(c(1,2,3,4),nrow=2,ncol=2,byrow=FALSE) # inputs by column
A2
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
A %*% A
## [,1] [,2]
## [1,] 7 10
## [2,] 15 22
A * A
## [,1] [,2]
## [1,] 1 4
## [2,] 9 16
A[2,2] #2nd row, 2nd col
## [1] 4
A[1,2] #1st row, 2nd col
## [1] 2
v = 1:6
v
## [1] 1 2 3 4 5 6
m = matrix(c(1,0,0,1),byrow=T,nrow=2)
m
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
l = list(v,m)
l
## [[1]]
## [1] 1 2 3 4 5 6
##
## [[2]]
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
We can index specific elements like this:
m and then row 2 col 1 which stores the value 0l[[2]][2,1]
## [1] 0
v, and then the fourth element which stores the value 4l[[1]][4]
## [1] 4
Add a new element to the list
v2 = 3:4 # create another vector
l[[3]] = v2 # add the newly created vector
install.packages()install.packages("ggplot2")
library()library(ggplot2)
? before the name of the data set.R Documentation pages?mpg
| mpg | R Documentation |
This dataset contains a subset of the fuel economy data that the EPA makes available on http://fueleconomy.gov. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.
A data frame with 234 rows and 11 variables:
manufacturer name
model name
engine displacement, in litres
year of manufacture
number of cylinders
type of transmission
the type of drive train, where f = front-wheel drive, r = rear wheel drive, 4 = 4wd
city miles per gallon
highway miles per gallon
fuel type
“type” of car
mpg is available in the ggplot2 packagedata(mpg)
str(mpg) # structure of the data set
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
summary(mpg) # summary of quantitative variables
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
table()table(mpg$manufacturer) # manufacturer is categorical variable
##
## audi chevrolet dodge ford honda hyundai jeep
## 18 19 37 25 9 14 8
## land rover lincoln mercury nissan pontiac subaru toyota
## 4 3 4 13 5 14 34
## volkswagen
## 27
table(mpg$year)
##
## 1999 2008
## 117 117
table(mpg$manufacturer,mpg$year)
##
## 1999 2008
## audi 9 9
## chevrolet 7 12
## dodge 16 21
## ford 15 10
## honda 5 4
## hyundai 6 8
## jeep 2 6
## land rover 2 2
## lincoln 2 1
## mercury 2 2
## nissan 6 7
## pontiac 3 2
## subaru 6 8
## toyota 20 14
## volkswagen 16 11
mean(mpg$displ)
## [1] 3.471795
sd(mpg$displ)
## [1] 1.291959
range(mpg$displ)
## [1] 1.6 7.0
Histogram
hist(mpg$displ,
main="Engine displacement (in liters)", #title
col=terrain.colors(20), #color
xlim=c(0,8)) #range of x axis
?histBoxplot
boxplot(mpg$displ, main="Engine displacement (in liters)")
boxplot(mpg$displ~mpg$manufacturer, main = "Engine displacement (in liters) by Manufacturer")
Barplot
table() to produce a plotbarplot(table(mpg$manufacturer),
main = "Counts by Manufacturer",
ylim = c(0,40))
barplot(table(mpg$manufacturer),
main = "Counts by Manufacturer",
xlim = c(0,40),
horiz = TRUE, #flip axes
las=1) #specify horizontal axis labels