20^2 + 36 # 20 power 2 + 36
## [1] 436
(2+7+8)/3
## [1] 5.7
10<5
## [1] FALSE
(10-5)==5
## [1] TRUE
March 2021
20^2 + 36 # 20 power 2 + 36
## [1] 436
(2+7+8)/3
## [1] 5.7
10<5
## [1] FALSE
(10-5)==5
## [1] TRUE
Vector is a sequence of numbers. We define a vector \(a\) with 3 elements
a = c(2,7,8)
Now we can do operations with a vector
sum(a)
## [1] 17
sum(a)/3
## [1] 5.7
mean(a)
## [1] 5.7
a = rbinom(n = 30,size = 1,prob = 0.5) # 30 coin flips print(a)
## [1] 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0
sum(a)/30 # proportion of 1s in the sequence
## [1] 0.4
a = c(1,2,3,4,5,6,7,8,9,10)
Element-wise add
a+3
## [1] 4 5 6 7 8 9 10 11 12 13
Element-wise comparison
a > 5
## [1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
Use it to calculate number of elements greater than 5
sum(a>5)
## [1] 5
Data Frames a.k.a table (like excel table)
sb = read.csv("https://vsokolov.org/courses/data/stats/superbowl.csv")
head(sb) # print first 6 rows
## Favorite Underdog Spread Outcome Upset ## 1 GreenBay KansasCity 14 25 0 ## 2 GreenBay Oakland 14 19 0 ## 3 Baltimore NYJets 18 -9 1 ## 4 Minnesota KansasCity 12 -16 1 ## 5 Dallas Baltimore 2 -3 1 ## 6 Dallas Miami 6 21 0
sb$Favorite[1:6] # Print first 6 elements of column Favorite
## [1] "GreenBay" "GreenBay" "Baltimore" "Minnesota" "Dallas" ## [6] "Dallas"
str(sb)
## 'data.frame': 49 obs. of 5 variables: ## $ Favorite: chr "GreenBay" "GreenBay" "Baltimore" "Minnesota" ... ## $ Underdog: chr "KansasCity" "Oakland" "NYJets" "KansasCity" ... ## $ Spread : num 14 13.5 18 12.5 2 6 1 7 3 6 ... ## $ Outcome : num 25 19 -9 -16 -3 21 7 17 10 -4 ... ## $ Upset : int 0 0 1 1 1 0 0 0 0 1 ...
summary(sb)
## Favorite Underdog Spread Outcome Upset ## Length:49 Length:49 Min. : 1 Min. :-35 Min. :0.00 ## Class :character Class :character 1st Qu.: 3 1st Qu.: -3 1st Qu.:0.00 ## Mode :character Mode :character Median : 6 Median : 7 Median :0.00 ## Mean : 7 Mean : 6 Mean :0.33 ## 3rd Qu.:10 3rd Qu.: 17 3rd Qu.:1.00 ## Max. :19 Max. : 45 Max. :1.00
Will use Google stock price to plot Histogram and Line plot for closing price
goog = read.csv("https://vsokolov.org/courses/data/stats/GOOG.csv")
head(goog)
## Date Open High Low Close Adj.Close Volume ## 1 2005-01-03 98 101 97 101 101 31807176 ## 2 2005-01-04 100 101 96 97 97 27614921 ## 3 2005-01-05 96 98 96 96 96 16534946 ## 4 2005-01-06 97 98 94 94 94 20852067 ## 5 2005-01-07 95 97 94 97 97 19398238 ## 6 2005-01-10 97 99 96 97 97 15135721
hist(goog$Adj.Close, main="Closing Price Histogram", xlab="Closing Price")
Plot of type l (line) with color red and line width is 1.5
plot(goog$Adj.Close, type='l', col="red", lwd = 1.5, ylab="Price")
str(goog)
## 'data.frame': 4051 obs. of 7 variables: ## $ Date : chr "2005-01-03" "2005-01-04" "2005-01-05" "2005-01-06" ... ## $ Open : num 98.3 100.3 96.4 97.2 95 ... ## $ High : num 101.4 101.1 98.1 97.6 96.8 ... ## $ Low : num 97.4 96.4 95.8 93.5 94 ... ## $ Close : num 101 96.9 96.4 93.9 96.6 ... ## $ Adj.Close: num 101 96.9 96.4 93.9 96.6 ... ## $ Volume : int 31807176 27614921 16534946 20852067 19398238 15135721 13969566 16416905 13750146 19352868 ...
goog$Date = as.Date(goog$Date) str(goog)
## 'data.frame': 4051 obs. of 7 variables: ## $ Date : Date, format: "2005-01-03" "2005-01-04" ... ## $ Open : num 98.3 100.3 96.4 97.2 95 ... ## $ High : num 101.4 101.1 98.1 97.6 96.8 ... ## $ Low : num 97.4 96.4 95.8 93.5 94 ... ## $ Close : num 101 96.9 96.4 93.9 96.6 ... ## $ Adj.Close: num 101 96.9 96.4 93.9 96.6 ... ## $ Volume : int 31807176 27614921 16534946 20852067 19398238 15135721 13969566 16416905 13750146 19352868 ...
plot(goog$Date,goog$Adj.Close, type='l', xlab="Date", ylab="Closing Price")
Use help function to get documentation
help(rbinom) # to get help