1. Vector

c(1, 1.3, 8) #Creation of a vector
## [1] 1.0 1.3 8.0
x = 2:5 #Creation of 1D array of integers from 2 to 5
x
## [1] 2 3 4 5
x^2 + 1/4 #broadcasting 1/4 to the power of 2 array of x as a sum.  
## [1]  4.25  9.25 16.25 25.25
class(x) #Type of the object
## [1] "integer"
set.seed(2021) # Random number generator which allows the below sample command to generate the same set every time
y = sample(1:50) # Random sample of integers 1 to 50
y
##  [1]  7 38 46 39 12  6 49 44  5 47 23 48 18  3 26 22 31 19  4 21 35 42  9 45 43
## [26] 11 36 27 30 40 15  2 24 16 20  1  8 17 34 29 32 10 41 37 50 33 13 25 28 14
summary(y) # 5 point summary including mean,median, 1st quartile, 3rd quartile, min and max
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   13.25   25.50   25.50   37.75   50.00
class(y) #Type of y variable
## [1] "integer"
str(y) # It provides details regarding the object enclosed in the parenthesis. Here it describes that y is a list of 50 numbers which are all integers
##  int [1:50] 7 38 46 39 12 6 49 44 5 47 ...

2. Data Frame

head(cars, 8) # It displays first 8 rows of the cars dataset.
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
## 7    10   18
## 8    10   26
class(cars) # It displays the type of the object. In this case, cars dataset has data frame type.
## [1] "data.frame"
summary(cars) #It displays the statistical summary including min,max, interquartile ranges, median of the cars dataset.
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
summary(cars$speed) # It displays the statistical summary for only the speed column of the cars dataset
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    12.0    15.0    15.4    19.0    25.0
summary(cars$dist) #It displays the statistical summary for only the distance column of the cars dataset
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   26.00   36.00   42.98   56.00  120.00
speed = cars$speed # Assignment of the speed column belonging to cars dataset to an object/variable
mean(speed) 
## [1] 15.4
median(speed)
## [1] 15
min(speed)
## [1] 4
max(speed)
## [1] 25
quantile(speed) 
##   0%  25%  50%  75% 100% 
##    4   12   15   19   25
quantile(speed, probs=seq(0,1, by = 0.20)) #Show quantiles between the range of 0% and 100% spaced out by 20%
##   0%  20%  40%  60%  80% 100% 
##    4   11   14   17   20   25
sd(speed) # Standard deviation of the speed column
## [1] 5.287644
var(speed) # Variance of the speed column
## [1] 27.95918
dist = cars$dist 
##correlation
cor(speed, dist) 
## [1] 0.8068949
str(cars) # Providing a summary/details of the cars object
## 'data.frame':    50 obs. of  2 variables:
##  $ speed: num  4 4 7 7 8 9 10 10 10 11 ...
##  $ dist : num  2 10 4 22 16 10 18 26 34 17 ...
dim(cars) # Providing total rows and columns of the features and training data
## [1] 50  2
View(cars) # It shows the dataset in an excel type format  

3. Data Subsets

cars[1,] #First row only with all columns data
##   speed dist
## 1     4    2
cars[,1] # All rows of the the first column
##  [1]  4  4  7  7  8  9 10 10 10 11 11 12 12 12 12 13 13 13 13 14 14 14 14 15 15
## [26] 15 16 16 17 17 17 18 18 18 18 19 19 19 20 20 20 20 20 22 23 24 24 24 24 25
cars[,1:2] # All rows of the first 2 columns
##    speed dist
## 1      4    2
## 2      4   10
## 3      7    4
## 4      7   22
## 5      8   16
## 6      9   10
## 7     10   18
## 8     10   26
## 9     10   34
## 10    11   17
## 11    11   28
## 12    12   14
## 13    12   20
## 14    12   24
## 15    12   28
## 16    13   26
## 17    13   34
## 18    13   34
## 19    13   46
## 20    14   26
## 21    14   36
## 22    14   60
## 23    14   80
## 24    15   20
## 25    15   26
## 26    15   54
## 27    16   32
## 28    16   40
## 29    17   32
## 30    17   40
## 31    17   50
## 32    18   42
## 33    18   56
## 34    18   76
## 35    18   84
## 36    19   36
## 37    19   46
## 38    19   68
## 39    20   32
## 40    20   48
## 41    20   52
## 42    20   56
## 43    20   64
## 44    22   66
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
cars$dist[1:3] # First 3 rows of the dist variable carrying distance column details
## [1]  2 10  4
cars[c(2,8,4),2] # It shows the elements present in the 2nd row, 2nd column| 8th row and 2nd column | 4th row and 2nd column
## [1] 10 26 22
cars[-c(2:49),] # All the data except between 2nd row and 49th row
##    speed dist
## 1      4    2
## 50    25   85
which.max(cars$speed) #It tells the location/index of the maximum value
## [1] 50
cars[which.max(cars$speed),"dist"] # It shows the distance traveled by the car with the maximum speed
## [1] 85
cars$dist %in% c(10,20) #%in% matches the integers (10/20) in the distances traveled by the car
##  [1] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [13]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE
cars[cars$dist %in% c(10,20),] #show all the cars data set whenever the distance is either 10 or 20
##    speed dist
## 2      4   10
## 6      9   10
## 13    12   20
## 24    15   20

5. Functions

f = function(x,y){
  r = sqrt(x^2 + y^2)
  return(10*sin(r)/r)
}
f(3,4)
## [1] -1.917849
x = seq(0, 10, length=4) # sequence of numbers equally spaced of length 4 between 0 and 10
y=x
z=outer(x,y,f) # Applies the function 'f' to the inputs 'x' and 'y'
persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue") #Theta and Phi are for viewing purpose of the 3d plane created using x,y and z. Further, expansion factor is applied to the z coordinates for shrinking the plane in the z-direction for ease of view.

x=seq(-10,10,length=300)
plot(x,sin(x)) # Plot is being created for x and sinx where x is the number of floats between -10 and 10 which are equally spaced between 300 intervals

#dummy comment to clear plot

6. Packages

New methods and tools are stored in packages which may need to be installed. ISLR is a commonly used package.

Method 1: use code to download and install package:

install.packages(“ISLR”) #downloads package
library(ISLR) #loads package

Some packages produce warning messages because they are intended for a different version of R. You can ignore the warning messages.

Method 2: In RStudio’s lower right pane, there is a Packages tab. The package you need may appear in the list under this tab. If it does, just check the box and the package will be installed. ISLR should listed under this tab.

Method 3: RStudio may prompt with an offer to install a needed missing package. If this happens, agree to the install.

Hint: If you install a package using code, you will want to comment out that line of code following the install:

#install.packages(“ISLR”)
library(ISLR) <- The load statement can remain