———————————————————————–
#numeric Data Type
age=24
age
## [1] 24
class(age)
## [1] "numeric"
typeof(age)
## [1] "double"
#integer Data Type
age=24L
age
## [1] 24
class(age)
## [1] "integer"
typeof(age)
## [1] "integer"
#logical Data Type
status=TRUE
status
## [1] TRUE
class(status)
## [1] "logical"
typeof(status)
## [1] "logical"
#Convert numeric type to integer
ageInt=as.integer(age)
ageInt
## [1] 24
class(ageInt)
## [1] "integer"
typeof(ageInt)
## [1] "integer"
#Character Data Type
age="24.5"
age
## [1] "24.5"
class(age)
## [1] "character"
typeof(age)
## [1] "character"
#Creating vector using numeric data type
Age=c(10,20,40.5,60)
Age
## [1] 10.0 20.0 40.5 60.0
class(Age)
## [1] "numeric"
typeof(Age)
## [1] "double"
#Creating vector using character data type
state=c("WB","Delhi","Rajasthan")
state
## [1] "WB" "Delhi" "Rajasthan"
class(state)
## [1] "character"
typeof(state)
## [1] "character"
#Creating vector using logical data type
status=c(TRUE,FALSE,FALSE,TRUE)
status
## [1] TRUE FALSE FALSE TRUE
class(status)
## [1] "logical"
typeof(status)
## [1] "logical"
#Vector arithmatic
Age=c(10,20,40.5,60)
x=Age/5
x
## [1] 2.0 4.0 8.1 12.0
y=2*Age+5
y
## [1] 25 45 86 125
z=c(x,3)
z
## [1] 2.0 4.0 8.1 12.0 3.0
z=c(x,3,x)
z
## [1] 2.0 4.0 8.1 12.0 3.0 2.0 4.0 8.1 12.0
mean_age=sum(Age)/length(Age)
mean_age
## [1] 32.625
mean_age=mean(Age)
mean_age
## [1] 32.625
var_age=sum((Age-mean_age)^2/(length(Age)-1))
var_age
## [1] 494.2292
var_age=var(Age)
var_age
## [1] 494.2292
#Generating vector using functions
x=1:30
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30
x=seq(1,20,by=2)
x
## [1] 1 3 5 7 9 11 13 15 17 19
x=seq(0,20,length.out=5)
x
## [1] 0 5 10 15 20
x=rep(Age,5)
x
## [1] 10.0 20.0 40.5 60.0 10.0 20.0 40.5 60.0 10.0 20.0 40.5 60.0 10.0 20.0 40.5
## [16] 60.0 10.0 20.0 40.5 60.0
young=Age<=18
young
## [1] TRUE FALSE FALSE FALSE
x=rep(c("A","B"),5)
x=paste(c("A","B"),1:5,sep="")
x
## [1] "A1" "B2" "A3" "B4" "A5"
#Indexing Vector
Age=c(47,61,72,43,55,53,50,NA )
#Logical Indexing
Age[(!is.na(Age)) & Age>50]
## [1] 61 72 55 53
Age[is.na(Age)]=0
Age
## [1] 47 61 72 43 55 53 50 0
#Indexing by +ve integer
Age[1]
## [1] 47
Age[2:5]
## [1] 61 72 43 55
#Indexing by -ve integer
Age[-5]
## [1] 47 61 72 43 53 50 0
Age[-(2:5)]
## [1] 47 53 50 0
#Matrix
policies=matrix(c(4,5,6,9,3,12),nrow=2,ncol=3)
policies
## [,1] [,2] [,3]
## [1,] 4 6 3
## [2,] 5 9 12
policies=matrix(c(4,5,6,9,3,12),nrow=2,ncol=3,byrow=TRUE)
policies
## [,1] [,2] [,3]
## [1,] 4 5 6
## [2,] 9 3 12
policies[2,2]
## [1] 3
#List
lst=list(name="NSOU",Courses=c("UG","PG","Phd"),no_of_learners=300000)
lst
## $name
## [1] "NSOU"
##
## $Courses
## [1] "UG" "PG" "Phd"
##
## $no_of_learners
## [1] 3e+05
lst$name
## [1] "NSOU"
lst$no_of_learners
## [1] 3e+05
lst$Courses[1]
## [1] "UG"
lst[[2]]
## [1] "UG" "PG" "Phd"
lst[[2]][2]
## [1] "PG"
## add names to the rows and columns of the matrix
dimnames(policies)
## NULL
policies
## [,1] [,2] [,3]
## [1,] 4 5 6
## [2,] 9 3 12
dimnames(policies)=list(c("2013","2014"),c("Medical","Recreational","Both"))
#Factors
states=c('st1', 'st2', 'st3', 'st4','st5',"st6")
stateF=factor(states)
policies_13_14=c("Medical","Recreational","Both","Both","Both","Both")
class(policies_13_14)
## [1] "character"
policiesF=factor(policies_13_14)
policiesF
## [1] Medical Recreational Both Both Both
## [6] Both
## Levels: Both Medical Recreational
class(stateF)
## [1] "factor"
levels(policiesF)
## [1] "Both" "Medical" "Recreational"
tapply(states,policiesF,length)
## Both Medical Recreational
## 4 1 1
#DataFrame
legalYr=c("2013","2014","2013","2013","2014","2014")
legalYrF=factor(legalYr)
ounceLim=c(1,2,3.5,4,4.5,6)
marData=data.frame(stateF,policiesF,legalYrF,ounceLim)
summary(marData)
## stateF policiesF legalYrF ounceLim
## st1:1 Both :4 2013:3 Min. :1.000
## st2:1 Medical :1 2014:3 1st Qu.:2.375
## st3:1 Recreational:1 Median :3.750
## st4:1 Mean :3.500
## st5:1 3rd Qu.:4.375
## st6:1 Max. :6.000
#Remove the environment variable
rm(list=ls())
#Set the working directory
dataFdr="D:\\D Drive\\Certificate Course\\data"
filename="legal_weed_age_GSS2016_ch1.csv"
dataFile=paste(dataFdr,filename,sep="\\")
dataFile
## [1] "D:\\D Drive\\Certificate Course\\data\\legal_weed_age_GSS2016_ch1.csv"
library(readr)
data1=read_csv(dataFile)
## Rows: 2867 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): grass, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data1$age=as.numeric(data1$age)
## Warning: NAs introduced by coercion
data1$grass=as.factor(data1$grass)
summary(data1)
## grass age
## DK : 110 Min. :18.00
## IAP : 911 1st Qu.:34.00
## LEGAL :1126 Median :49.00
## NOT LEGAL: 717 Mean :48.85
## NA's : 3 3rd Qu.:62.00
## Max. :88.00
## NA's :32