MATRICES

Before starting with matrices is common to start working with set and subset. Below you will find some useful functions.

Union and intersect of two sets A and B

A=c(1,4,2,1,2,7)
B=c(1,3,5,6,1,2,8,4)
union(A,B)# union of the sets
[1] 1 4 2 7 3 5 6 8
intersect(A,B)# intersection of the sets
[1] 1 4 2

setdiff() and setequal()

You will find explanations for the function output in the comments.

A
[1] 1 4 2 1 2 7
B
[1] 1 3 5 6 1 2 8 4
setdiff(A,B) # elements of A which are not in B
[1] 7
setdiff(B,A) #elements of B which are not in A
[1] 3 5 6 8
setequal(A,B) # TRUE if the sets are equal , False otherwise
[1] FALSE
C<-c(1,2,3)
D<-c(2,3,1)
setequal(C,D)
[1] TRUE

%in%

Controls if a set X is in set Y,and returns TRUE or FALSE for elements in Y which are or not in X (pay attention to the order)

X<-1:12
X
 [1]  1  2  3  4  5  6  7  8  9 10 11 12
Y<-5:9
Y
[1] 5 6 7 8 9
Y %in% X
[1] TRUE TRUE TRUE TRUE TRUE
X %in% Y
 [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
all(X%in%Y)# are all elements of X in Y? It will return TRUE or FALSE
[1] FALSE
all(Y%in%X)# are all elements of Y in X?
[1] TRUE

How to create a matrix and name the rows and columns.

We can use the function matrix(), and as arguments a vector of values, and use nrow or ncol to decide on the dimension of the matrix. And, add argument byrow=TRUE

M<-matrix(c(1,0,4,0,1,0,7,9,1),nrow=3)# by default it is organized by columns 
M
     [,1] [,2] [,3]
[1,]    1    0    7
[2,]    0    1    9
[3,]    4    0    1
M1<-matrix(c(1,0,4,0,1,0,7,9,1),nrow=3,byrow=TRUE)# adding argument byrow=TRUE, we see that the matrix has changed
M1
     [,1] [,2] [,3]
[1,]    1    0    4
[2,]    0    1    0
[3,]    7    9    1
rownames(M)<-c("A","B","C")
colnames(M)<-c("a","b","c")
M
  a b c
A 1 0 7
B 0 1 9
C 4 0 1
colnames(M)<-colnames(M,do.NULL=TRUE,prefix="Column.") # ?
M
  a b c
A 1 0 7
B 0 1 9
C 4 0 1

Or, we can use dimnames argument inside the function matrix() :

D<-matrix(c(1,2,3,11,12,13), nrow=2, ncol=3, byrow=TRUE, 
 dimnames=list(c("row 1", "row 2"), c("Col 1", "Col 2", "Col 3")))
D
      Col 1 Col 2 Col 3
row 1     1     2     3
row 2    11    12    13

Also we can name the rows and columns by letters or month name or abbreviations for months:

length(D[,1])
[1] 2
rownames(D)=LETTERS[1:length(D[,1])]
D
  Col 1 Col 2 Col 3
A     1     2     3
B    11    12    13
colnames(D)=letters[1:length(D[1,])]
D
   a  b  c
A  1  2  3
B 11 12 13
colnames(D)=month.name[1:3]
D
  January February March
A       1        2     3
B      11       12    13
colnames(D)=month.abb[1:3]# 
D
  Jan Feb Mar
A   1   2   3
B  11  12  13

We can obtain the number of rows and columns using:

nrow(D)
[1] 2
ncol(D)
[1] 3

Grouping in Matrix

Let suppose each row is an individual (item, observation)

M1<-matrix(c(1,0,4,0,1,0,7,9,1,4,6,2),nrow=4)
M1
     [,1] [,2] [,3]
[1,]    1    1    1
[2,]    0    0    4
[3,]    4    7    6
[4,]    0    9    2
group=c("G1","G2","G2","G1")
rowSums(M1)# sum by row
[1]  3  4 17 11
apply(M1,1,sum)# apply to the columns the sum (same as rowSums())
[1]  3  4 17 11
tapply(M1, list(group[row(M1)], col(M1)), sum)# apply to the categories of group vector the sum, by columns. We see G1 and G2 which are the two categories and the sum of first row in M1 which was G1 and forth row which was G1; the same for the sum of G2 (second and third row of M1)
   1  2  3
G1 1 10  3
G2 4  7 10
aggregate(M1,list(group),sum)# the same as tapply() function above
aggregate(M1,list(group),mean)# the same as tapply() function above

apply(M1,2,mean)# apply to the columns the mean
[1] 1.25 4.25 3.25
apply(M1,1,mean)# apply to the row the mean
[1] 1.000000 1.333333 5.666667 3.666667
cbind and rbind

Add rows or columns using cbind and rbind

x1<-c(3,6)
x2<-c(12,23)
mat.1<-rbind(x1,x2)
mat.1
   [,1] [,2]
x1    3    6
x2   12   23
mat.2<-cbind(x1,x2)
mat.2
     x1 x2
[1,]  3 12
[2,]  6 23
x3<-c(0,1)
mat.3<-cbind(mat.2,x3)
mat.3
     x1 x2 x3
[1,]  3 12  0
[2,]  6 23  1
######################
M2<-matrix(c(1,0,4,0,1,0,7,9,1,4,6,2,5,0,7,8),nrow=4)
M2
     [,1] [,2] [,3] [,4]
[1,]    1    1    1    5
[2,]    0    0    4    0
[3,]    4    7    6    7
[4,]    0    9    2    8
M2<-rbind(M2,apply(M2,2,mean))# apply to the column (index =2) the mean, a new column will be added to the matrix
M2
     [,1] [,2] [,3] [,4]
[1,] 1.00 1.00 1.00    5
[2,] 0.00 0.00 4.00    0
[3,] 4.00 7.00 6.00    7
[4,] 0.00 9.00 2.00    8
[5,] 1.25 4.25 3.25    5
M2<-cbind(M2,apply(M2,1,var))# apply to the row (index=1) the variance, a new row will be added to the matrix
M2
     [,1] [,2] [,3] [,4]      [,5]
[1,] 1.00 1.00 1.00    5  4.000000
[2,] 0.00 0.00 4.00    0  4.000000
[3,] 4.00 7.00 6.00    7  2.000000
[4,] 0.00 9.00 2.00    8 19.583333
[5,] 1.25 4.25 3.25    5  2.640625
colnames(M2)<-c(1:4,"variance")
rownames(M2)<-c(1:4,"mean")
M2
        1    2    3 4  variance
1    1.00 1.00 1.00 5  4.000000
2    0.00 0.00 4.00 0  4.000000
3    4.00 7.00 6.00 7  2.000000
4    0.00 9.00 2.00 8 19.583333
mean 1.25 4.25 3.25 5  2.640625

Matrix multiplications

M2<-matrix(c(1,0,4,0,1,0,7,9,1,4,6,2,5,0,7,8),nrow=4)
M2
     [,1] [,2] [,3] [,4]
[1,]    1    1    1    5
[2,]    0    0    4    0
[3,]    4    7    6    7
[4,]    0    9    2    8
M2*M2
     [,1] [,2] [,3] [,4]
[1,]    1    1    1   25
[2,]    0    0   16    0
[3,]   16   49   36   49
[4,]    0   81    4   64
M2%*%M2 # M2^2 quadratic matrix of M2
     [,1] [,2] [,3] [,4]
[1,]    5   53   21   52
[2,]   16   28   24   28
[3,]   28  109   82  118
[4,]    8   86   64   78

Solving linear equations

 solve(M2)# gives the inverse of matrix M2
            [,1]        [,2]        [,3]        [,4]
[1,] -0.04964539 -0.28191489  0.26241135 -0.19858156
[2,] -0.22695035 -0.07446809  0.05673759  0.09219858
[3,]  0.00000000  0.25000000  0.00000000  0.00000000
[4,]  0.25531915  0.02127660 -0.06382979  0.02127660

Example

Let suppose we have the linear equations: 4x+5y=1 and 6x+7y=9 and we want to find the solutions x and y. First we create a coefficient matrix A, a solution vector b.

A<-matrix(c(4,5,6,7),nrow=2,byrow=TRUE)
A
     [,1] [,2]
[1,]    4    5
[2,]    6    7
b<-c(1,9)
b
[1] 1 9
solve(A,b)# the solution is x=19 and y=-15
[1]  19 -15

Other helpful functions for matrices are:

t(), det(), qr(), chol(), eigen(), prcomp(), diag(), array(), outer()

M2<-matrix(c(1,0,4,0,1,0,7,9,1,4,6,2,5,0,7,8),nrow=4)
M2
     [,1] [,2] [,3] [,4]
[1,]    1    1    1    5
[2,]    0    0    4    0
[3,]    4    7    6    7
[4,]    0    9    2    8
diag(M2)# the diagonal of M2
[1] 1 0 6 8
det(M2)# the determinant of matrix M2
[1] -564
t(M2)# the transpose of M2
     [,1] [,2] [,3] [,4]
[1,]    1    0    4    0
[2,]    1    0    7    9
[3,]    1    4    6    2
[4,]    5    0    7    8

More computations with matrices

library(dplyr)
library( miscTools)
x <- matrix(rnorm(20,5,3), nrow = 5, ncol = 4)
print(x)
           [,1]     [,2]     [,3]     [,4]
[1,]  8.2127467 2.057258 8.199367 4.350643
[2,]  4.1319954 2.031152 7.494436 9.373699
[3,]  4.4441381 5.829440 4.797668 6.617743
[4,]  7.7193599 3.242927 8.202407 2.428166
[5,] -0.5395915 9.121853 3.425706 4.245485

Row averages

print(rowMeans(x))
[1] 5.705004 5.757821 5.422247 5.398215 4.063363
print(rowMedians(x))
[1] 6.275005 5.813216 5.313554 5.481144 3.835596

Column averages

print(colMeans(x))
[1] 4.793730 4.456526 6.423917 5.403147
print(colMedians(x))
[1] 4.444138 3.242927 7.494436 4.350643

Row variabilities

library(matrixStats)
print(rowVars(x))
[1]  9.2169888 10.8739260  0.9806039  8.9058861 15.7384295
print(rowSds(x))
[1] 3.0359494 3.2975636 0.9902544 2.9842731 3.9671690
print(rowMads(x))
[1] 2.862977 3.885675 1.026924 3.676462 3.547177
print(rowIQRs(x))
[1] 4.425415 4.357467 1.317230 4.800885 3.030196

Column variabilities

print(colVars(x))
[1] 12.313353  9.190480  4.773804  7.134744
print(colSds(x))
[1] 3.509039 3.031580 2.184904 2.671094
print(colMads(x))
[1] 4.855844 1.796579 1.049637 2.850264
print(colIQRs(x))
[1] 3.587364 3.772182 3.401699 2.372257

Row ranges

print(rowRanges(x))
           [,1]     [,2]
[1,]  2.0572578 8.212747
[2,]  2.0311517 9.373699
[3,]  4.4441381 6.617743
[4,]  2.4281665 8.202407
[5,] -0.5395915 9.121853
print(cbind(rowMins(x), rowMaxs(x)))
           [,1]     [,2]
[1,]  2.0572578 8.212747
[2,]  2.0311517 9.373699
[3,]  4.4441381 6.617743
[4,]  2.4281665 8.202407
[5,] -0.5395915 9.121853
print(cbind(rowOrderStats(x, which = 1), rowOrderStats(x, which = ncol(x))))
           [,1]     [,2]
[1,]  2.0572578 8.212747
[2,]  2.0311517 9.373699
[3,]  4.4441381 6.617743
[4,]  2.4281665 8.202407
[5,] -0.5395915 9.121853

Column ranges

print(colRanges(x))
           [,1]     [,2]
[1,] -0.5395915 8.212747
[2,]  2.0311517 9.121853
[3,]  3.4257057 8.202407
[4,]  2.4281665 9.373699
print(cbind(colMins(x), colMaxs(x)))
           [,1]     [,2]
[1,] -0.5395915 8.212747
[2,]  2.0311517 9.121853
[3,]  3.4257057 8.202407
[4,]  2.4281665 9.373699
print(cbind(colOrderStats(x, which = 1), colOrderStats(x, which = nrow(x))))
           [,1]     [,2]
[1,] -0.5395915 8.212747
[2,]  2.0311517 9.121853
[3,]  3.4257057 8.202407
[4,]  2.4281665 9.373699
x <- matrix(rnorm(2400), nrow = 50, ncol = 40)

Row standard deviations

d <- rowDiffs(x)
s1 <- rowSds(d) / sqrt(2)
s2 <- rowSds(x)
print(summary(s1 - s2))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.15119 -0.02147  0.02619  0.02990  0.09272  0.18046 

Column standard deviations

d <- colDiffs(x)
s1 <- colSds(d) / sqrt(2)
s2 <- colSds(x)
print(summary(s1 - s2))
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-1.932e-01 -4.773e-02  1.735e-02 -2.028e-05  5.298e-02  1.522e-01 

ARRAYS

A<-letters[1:24]# 24 letters of the alphabet
dim(A)=c(3,2,4)# create an array with matrices 3x2, and 4 matrices (6x4=24)
A
, , 1

     [,1] [,2]
[1,] "a"  "d" 
[2,] "b"  "e" 
[3,] "c"  "f" 

, , 2

     [,1] [,2]
[1,] "g"  "j" 
[2,] "h"  "k" 
[3,] "i"  "l" 

, , 3

     [,1] [,2]
[1,] "m"  "p" 
[2,] "n"  "q" 
[3,] "o"  "r" 

, , 4

     [,1] [,2]
[1,] "s"  "v" 
[2,] "t"  "w" 
[3,] "u"  "x" 
A[,,1:3]# select all rows and all columns on matrix 1,2 and 3
, , 1

     [,1] [,2]
[1,] "a"  "d" 
[2,] "b"  "e" 
[3,] "c"  "f" 

, , 2

     [,1] [,2]
[1,] "g"  "j" 
[2,] "h"  "k" 
[3,] "i"  "l" 

, , 3

     [,1] [,2]
[1,] "m"  "p" 
[2,] "n"  "q" 
[3,] "o"  "r" 
A[,,1]# 
     [,1] [,2]
[1,] "a"  "d" 
[2,] "b"  "e" 
[3,] "c"  "f" 
A[2,1,3]# select the 2nd row, 1st column of the 3rd matrix in A
[1] "n"
A[2,,2]# select the 2nd row, all the columns of the 2nd matrix in A
[1] "h" "k"
LISTS
crystal_list=list(Name="Ana" , Salary=1000 , City="Bucurest")
typeof(crystal_list)
[1] "list"
length(crystal_list)
[1] 3
names(crystal_list)
[1] "Name"   "Salary" "City"  
unlist(crystal_list)
      Name     Salary       City 
     "Ana"     "1000" "Bucurest" 
crystal_list[3]
$City
[1] "Bucurest"
crystal_list[[1]]# information about 1st variable
[1] "Ana"
crystal_list$Name
[1] "Ana"
crystal_list[1:2]
$Name
[1] "Ana"

$Salary
[1] 1000
crystal_list$Age=25# we add Age category to the list 
crystal_list
$Name
[1] "Ana"

$Salary
[1] 1000

$City
[1] "Bucurest"

$Age
[1] 25
crystal_list=list(Name=c("Ana","Ben"), Salary=c(1200,5600), City=c("Bucurest", "Tirana"))
summary(crystal_list)
       Length Class  Mode     
Name   2      -none- character
Salary 2      -none- numeric  
City   2      -none- character
summary(crystal_list$Salary)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1200    2300    3400    3400    4500    5600 
crystal_list.1=list(Gender=c("F","M"),Profession=c("student","Data Scientist"))  
crystal_final=list(crystal_list,crystal_list.1)# create a new list with additional information from another list of same individuals

Exercise: How can you add individuals to the previous list?Let suppose you have 3 other people with given name, salary, city and age. How can you add this to your existing list?

End! Introduction to R-Matrices, Arrays and equations!

