x = 2 : 10
x
## [1] 2 3 4 5 6 7 8 9 10
len=length(x)
len
## [1] 9
x= seq(2,10)
x
## [1] 2 3 4 5 6 7 8 9 10
x=rnorm(20)
x
## [1] 1.71066470 0.19523096 -0.68995790 -0.93658251 -0.03604616 -0.84925330
## [7] 0.61819224 1.25278047 -0.66014094 2.09707067 -0.37805813 -2.38380392
## [13] 0.02852406 -0.94447302 -1.62824033 -0.45393993 -1.03539135 0.26829173
## [19] 1.60149561 0.32110778
set.seed(2021)
x=rnorm(20)
x
## [1] -0.12245998 0.55245663 0.34864950 0.35963224 0.89805369 -1.92256952
## [7] 0.26174436 0.91556637 0.01377194 1.72996316 -1.08220485 -0.27282518
## [13] 0.18199540 1.50854179 1.60447011 -1.84147561 1.62331021 0.13138902
## [19] 1.48112247 1.51331829
x=seq(-pi,pi,length.out=50)
x
## [1] -3.14159265 -3.01336438 -2.88513611 -2.75690784 -2.62867957 -2.50045130
## [7] -2.37222302 -2.24399475 -2.11576648 -1.98753821 -1.85930994 -1.73108167
## [13] -1.60285339 -1.47462512 -1.34639685 -1.21816858 -1.08994031 -0.96171204
## [19] -0.83348377 -0.70525549 -0.57702722 -0.44879895 -0.32057068 -0.19234241
## [25] -0.06411414 0.06411414 0.19234241 0.32057068 0.44879895 0.57702722
## [31] 0.70525549 0.83348377 0.96171204 1.08994031 1.21816858 1.34639685
## [37] 1.47462512 1.60285339 1.73108167 1.85930994 1.98753821 2.11576648
## [43] 2.24399475 2.37222302 2.50045130 2.62867957 2.75690784 2.88513611
## [49] 3.01336438 3.14159265
x=rep(0,10)
x
## [1] 0 0 0 0 0 0 0 0 0 0
x=rep(NA,10)
x
## [1] NA NA NA NA NA NA NA NA NA NA
##Graphics
set.seed(2021)
x=rnorm(100)
y=rnorm(100)
plot(x,y)
plot(x,y,xlab="this is the x-axis", ylab="this is the y-axis", main="Plot of X vs Y",type="p",col="blue") #
A=matrix(1:16,4,4,byrow=TRUE) #
A
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
## [4,] 13 14 15 16
B=A[3,2]
B
## [1] 10
along with a sub matrix of 1st 2 rows and 1st 2 columns. Further dimension of the original matrix A is also being displayed.
First_row=A[1,]
First_row
## [1] 1 2 3 4
Last_Column = A[,4]
Last_Column
## [1] 4 8 12 16
Sub_mat=A[1:2,1:2]
Sub_mat
## [,1] [,2]
## [1,] 1 2
## [2,] 5 6
dimension=dim(A)
dimension
## [1] 4 4
##Exploring Datasets
library(ISLR)
dim(Wage)
## [1] 3000 11
str(Wage)
## 'data.frame': 3000 obs. of 11 variables:
## $ year : int 2006 2004 2003 2003 2005 2008 2009 2008 2006 2004 ...
## $ age : int 18 24 45 43 50 54 44 30 41 52 ...
## $ maritl : Factor w/ 5 levels "1. Never Married",..: 1 1 2 2 4 2 2 1 1 2 ...
## $ race : Factor w/ 4 levels "1. White","2. Black",..: 1 1 1 3 1 1 4 3 2 1 ...
## $ education : Factor w/ 5 levels "1. < HS Grad",..: 1 4 3 4 2 4 3 3 3 2 ...
## $ region : Factor w/ 9 levels "1. New England",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ jobclass : Factor w/ 2 levels "1. Industrial",..: 1 2 1 2 2 2 1 2 2 2 ...
## $ health : Factor w/ 2 levels "1. <=Good","2. >=Very Good": 1 2 1 2 1 2 2 1 2 2 ...
## $ health_ins: Factor w/ 2 levels "1. Yes","2. No": 2 2 1 1 1 1 1 1 1 1 ...
## $ logwage : num 4.32 4.26 4.88 5.04 4.32 ...
## $ wage : num 75 70.5 131 154.7 75 ...
summary(Wage)
## year age maritl race
## Min. :2003 Min. :18.00 1. Never Married: 648 1. White:2480
## 1st Qu.:2004 1st Qu.:33.75 2. Married :2074 2. Black: 293
## Median :2006 Median :42.00 3. Widowed : 19 3. Asian: 190
## Mean :2006 Mean :42.41 4. Divorced : 204 4. Other: 37
## 3rd Qu.:2008 3rd Qu.:51.00 5. Separated : 55
## Max. :2009 Max. :80.00
##
## education region jobclass
## 1. < HS Grad :268 2. Middle Atlantic :3000 1. Industrial :1544
## 2. HS Grad :971 1. New England : 0 2. Information:1456
## 3. Some College :650 3. East North Central: 0
## 4. College Grad :685 4. West North Central: 0
## 5. Advanced Degree:426 5. South Atlantic : 0
## 6. East South Central: 0
## (Other) : 0
## health health_ins logwage wage
## 1. <=Good : 858 1. Yes:2083 Min. :3.000 Min. : 20.09
## 2. >=Very Good:2142 2. No : 917 1st Qu.:4.447 1st Qu.: 85.38
## Median :4.653 Median :104.92
## Mean :4.654 Mean :111.70
## 3rd Qu.:4.857 3rd Qu.:128.68
## Max. :5.763 Max. :318.34
##
quantile(Wage$wage,0.90)
## 90%
## 154.7036
table(Wage[Wage$wage > quantile(Wage$wage, 0.90), "education"])
##
## 1. < HS Grad 2. HS Grad 3. Some College 4. College Grad
## 0 18 28 105
## 5. Advanced Degree
## 149
Note : There does not exists a single variable which has a near to 1 correlation with wage column. The only non-linear curve observe is with the LogWage column of the dataset which can not be considered worthy of correlation as it is just the log of the same attribute : wage.
plot(Wage)
####
Box Plot of race and wage is shown on the x and y-axis respectively.
plot(Wage$race, Wage$wage,xlab="Race", ylab="Wage", main="Plot of Race vs Wage",col="blue")