############################################################
# Clean R Environment
############################################################
# Remove all objects from the workspace
rm(list = ls())
############################################################
### Set the working directory where your files are located
setwd("D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile")
### Verify the current working directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile"
st = c("s1","s2","s3","s4") # Character vector
marks = c(20.5, 25, 27, 21) # Numeric vector
marks
## [1] 20.5 25.0 27.0 21.0
### R usually displays 7 significant digits for numeric values.
### Sequence of integers increment by 1
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
### Sequence with arbitrary increment
seq(from = 1, to = 20, by = 0.2)
## [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8
## [16] 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8
## [31] 7.0 7.2 7.4 7.6 7.8 8.0 8.2 8.4 8.6 8.8 9.0 9.2 9.4 9.6 9.8
## [46] 10.0 10.2 10.4 10.6 10.8 11.0 11.2 11.4 11.6 11.8 12.0 12.2 12.4 12.6 12.8
## [61] 13.0 13.2 13.4 13.6 13.8 14.0 14.2 14.4 14.6 14.8 15.0 15.2 15.4 15.6 15.8
## [76] 16.0 16.2 16.4 16.6 16.8 17.0 17.2 17.4 17.6 17.8 18.0 18.2 18.4 18.6 18.8
## [91] 19.0 19.2 19.4 19.6 19.8 20.0
###creates integer sequences with step allows custom increments.
rep(c("A", "B"), 5) # repeats elements a specified number of times.
## [1] "A" "B" "A" "B" "A" "B" "A" "B" "A" "B"
st[3] # Access the 3rd element
## [1] "s3"
st[-3] # Exclude the 3rd element
## [1] "s1" "s2" "s4"
###Types of Vectors
####### Numeric
####### Integer
####### Complex
####### Character
####### Logical
### Factor
### Factors are used for categorical data in R.
v = c(1, 1, 0, 0, 1, 1)
f1 = factor(v, levels = c(0,1),
labels = c("good","bad"),
ordered = TRUE)
f1
## [1] bad bad good good bad bad
## Levels: good < bad
levels(f1) # Get all factor levels
## [1] "good" "bad"
nlevels(f1) # Number of levels
## [1] 2
### Factors store categorical data and can be ordered. Useful for statistical modeling.
as.numeric(f1) # Convert factor to numeric
## [1] 2 2 1 1 2 2
as.character(marks) # Convert numeric to character
## [1] "20.5" "25" "27" "21"
length(st) # Number of elements
## [1] 4
st[length(st)] # Last element using length
## [1] "s4"
#Numerical Operations on Vectors}
marks + 5 # Addition
## [1] 25.5 30.0 32.0 26.0
marks - 2 # Subtraction
## [1] 18.5 23.0 25.0 19.0
marks * 2 # Multiplication
## [1] 41 50 54 42
marks / 2 # Division
## [1] 10.25 12.50 13.50 10.50
marks ^ 2 # Exponentiation
## [1] 420.25 625.00 729.00 441.00
###Vector Summary Functions
min(marks)
## [1] 20.5
max(marks)
## [1] 27
sum(marks)
## [1] 93.5
prod(marks)
## [1] 290587.5
cumsum(marks) # Cumulative sum
## [1] 20.5 45.5 72.5 93.5
###Logical Operators
x = c(TRUE, FALSE, TRUE)
!x # NOT
## [1] FALSE TRUE FALSE
x & c(TRUE, TRUE, FALSE) # AND
## [1] TRUE FALSE FALSE
x | c(FALSE, TRUE, TRUE) # OR
## [1] TRUE TRUE TRUE
marks > 22 # Comparison
## [1] FALSE TRUE TRUE FALSE
marks == 25 # Equality
## [1] FALSE TRUE FALSE FALSE
###paste() combines strings with spaces or separators.
paste("Student", st, "has marks", marks)
## [1] "Student s1 has marks 20.5" "Student s2 has marks 25"
## [3] "Student s3 has marks 27" "Student s4 has marks 21"
sort(marks) # Sort ascending
## [1] 20.5 21.0 25.0 27.0
rank(marks) # Rank elements
## [1] 1 3 4 2
order(marks) # Indices that would sort the vector
## [1] 1 4 2 3
###Data Frame
df = data.frame(Name=st, Marks=marks, Grade=c("A","B","A","C"))
df
## Name Marks Grade
## 1 s1 20.5 A
## 2 s2 25.0 B
## 3 s3 27.0 A
## 4 s4 21.0 C
### Data frames store different types in columns but all columns must have the same length.
cars_df = data.frame(speed=c(4,7,12), dist=c(2,10,24))
cars_df
## speed dist
## 1 4 2
## 2 7 10
## 3 12 24
###Data Frame Dimensions and Column Names}
nrow(df) # Number of rows
## [1] 4
ncol(df) # Number of columns
## [1] 3
dim(df) # Rows x Columns
## [1] 4 3
names(df) # Column names
## [1] "Name" "Marks" "Grade"
dimnames(df) # Row and column names
## [[1]]
## [1] "1" "2" "3" "4"
##
## [[2]]
## [1] "Name" "Marks" "Grade"
###Subset of Data Frame
df[3, ] # All variables for 3rd observation
## Name Marks Grade
## 3 s3 27 A
df[, 2] # 2nd variable for all observations
## [1] 20.5 25.0 27.0 21.0
df[c(1:5,12,15), ] # Observations 1 to 5, 12, 15
## Name Marks Grade
## 1 s1 20.5 A
## 2 s2 25.0 B
## 3 s3 27.0 A
## 4 s4 21.0 C
## NA <NA> NA <NA>
## NA.1 <NA> NA <NA>
## NA.2 <NA> NA <NA>
df[-(10:20), ] # Remove observations 10 to 20
## Name Marks Grade
## 1 s1 20.5 A
## 2 s2 25.0 B
## 3 s3 27.0 A
## 4 s4 21.0 C
df[nrow(df), ] # Last observation
## Name Marks Grade
## 4 s4 21 C