MSBA320_Project4R_Kanupriya.utf8

BASIC MATHEMATICAL FUNCTIONS AND LOGICAL OPERATORS IN R:

5-3

## [1] 2

2^3

## [1] 8

64^(1/3)

## [1] 4

64^(1/2)

## [1] 8

sqrt(64)

## [1] 8

abs(5.6-8.9)/.5 #abs() is for absolute value

## [1] 6.6

log(12,4) # log of 12 at base 4

## [1] 1.792481

log10(12) # log of 12 at base 10

## [1] 1.079181

factorial(5)

## [1] 120

choose(10,4) # choosing 4 things out of 10

## [1] 210

25%/%7 # this is quotient

## [1] 3

25%%7 # this is remainder

## [1] 4

floor(6.3) # this will round down - greatest integer < 6.3

## [1] 6

ceiling(6.3) # this will round up - smallest integer > 6.3

## [1] 7

round(7.9956725,digits = 4) # this rounds a float to to given decimal places

## [1] 7.9957

signif(7.9956725,digits = 4) # this gives x to 4 digits in scientific notation

## [1] 7.996

runif(5) # this generates 5 random numbers between 0 and 1, distributed uniformly

## [1] 0.08012073 0.20590587 0.64536359 0.39489703 0.54440751

sin(45)

## [1] 0.8509035

tan(30)

## [1] -6.405331

GENERATING SEQUENCES IN R:

3:25

##  [1]  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25

seq(3,25,3) #this generates a sequence from arg1 to arg2 with step=arg3

## [1]  3  6  9 12 15 18 21 24

seq(25,3,-3) # can also go backwards when generating a seq with negative step

## [1] 25 22 19 16 13 10  7  4

seq(from=7, by=2, along=1:10) # this generates a set of total 20 numbers, starting with 7, step=2

##  [1]  7  9 11 13 15 17 19 21 23 25

rep(4.615,15) # this will repeat a single number (=4.615) for a total of 15 times

##  [1] 4.615 4.615 4.615 4.615 4.615 4.615 4.615 4.615 4.615 4.615 4.615 4.615
## [13] 4.615 4.615 4.615

rep(1:5,4) # this will repeat a sequence (1:5) for a total of 4 times

##  [1] 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5

rep(1:5,rep(2,5)) # this will repeat each number in the 1:5 sequence "twice" - since the second rep() repeats 2 for total 5 times

##  [1] 1 1 2 2 3 3 4 4 5 5

rep(1:5,c(1,2,3,4,5)) # this is a more customized way of letting R know which specific number to repeat how many times

##  [1] 1 2 2 3 3 3 4 4 4 4 5 5 5 5 5

rep(c("Mon", "Tue", "Wed", "Thur"),c(3,1,4,2)) # the first c() gives list of arguments to be repeated, the second c() gives the number of times each argument in the previous function needs repeating

##  [1] "Mon"  "Mon"  "Mon"  "Tue"  "Wed"  "Wed"  "Wed"  "Wed"  "Thur" "Thur"

VECTORS IN R:

y = c(2,4,6,8) # the concatenate() creates lists in R
x = c(y,y) # this function can also take in different lists as arguments and join them to create a new list 
x

## [1] 2 4 6 8 2 4 6 8

n = length(x)
xmean = mean(x)
variance = sum((x-xmean)^2)/(n-1) #calculating variiance using sum(), length(), mean()
variance

## [1] 5.714286

var(x) # calculating variance in y using an in-built function

## [1] 5.714286

a = c(3,7,2,5,4,5,6,9,1,8)
quantile(a) # outputs avector containing - min 0%, lower quartile 25%, median 50%, upper quartile 75%, max 100% - of a)

##   0%  25%  50%  75% 100% 
## 1.00 3.25 5.00 6.75 9.00

rank(a) # this outputs ranks of individual elements in the same order as they are present in the vector

##  [1]  3.0  8.0  2.0  5.5  4.0  5.5  7.0 10.0  1.0  9.0

sort(a) # this outputs individual elements in a sorted order

##  [1] 1 2 3 4 5 5 6 7 8 9

order(a) # this outputs the positions to fetch individual elements from to create an ascending list. Here, smallest element is at position 9, next element at position 3, and so on.

##  [1]  9  3  1  5  4  6  7  2 10  8

newdtframe = data.frame(a,rank(a),sort(a),order(a)) # this creates a data frame which contains lists generated by above functions. We can use this specific data frame to view the results easily.
newdtframe

##    a rank.a. sort.a. order.a.
## 1  3     3.0       1        9
## 2  7     8.0       2        3
## 3  2     2.0       3        1
## 4  5     5.5       4        5
## 5  4     4.0       5        4
## 6  5     5.5       5        6
## 7  6     7.0       6        7
## 8  9    10.0       7        2
## 9  1     1.0       8       10
## 10 8     9.0       9        8

DATAFRAMES IN R:

worms = read.csv("C:\\temp\\Course Data\\worms.csv",header=T,row.names=1)
worms

##                   Area Slope Vegetation Soil.pH  Damp Worm.density
## Nashs.Field        3.6    11  Grassland     4.1 FALSE            4
## Silwood.Bottom     5.1     2     Arable     5.2 FALSE            7
## Nursery.Field      2.8     3  Grassland     4.3 FALSE            2
## Rush.Meadow        2.4     5     Meadow     4.9  TRUE            5
## Gunness.Thicket    3.8     0      Scrub     4.2 FALSE            6
## Oak.Mead           3.1     2  Grassland     3.9 FALSE            2
## Church.Field       3.5     3  Grassland     4.2 FALSE            3
## Ashurst            2.1     0     Arable     4.8 FALSE            4
## The.Orchard        1.9     0    Orchard     5.7 FALSE            9
## Rookery.Slope      1.5     4  Grassland     5.0  TRUE            7
## Garden.Wood        2.9    10      Scrub     5.2 FALSE            8
## North.Gravel       3.3     1  Grassland     4.1 FALSE            1
## South.Gravel       3.7     2  Grassland     4.0 FALSE            2
## Observatory.Ridge  1.8     6  Grassland     3.8 FALSE            0
## Pond.Field         4.1     0     Meadow     5.0  TRUE            6
## Water.Meadow       3.9     0     Meadow     4.9  TRUE            8
## Cheapside          2.2     8      Scrub     4.7  TRUE            4
## Pound.Hill         4.4     2     Arable     4.5 FALSE            5
## Gravel.Pit         2.9     1  Grassland     3.5 FALSE            1
## Farm.Wood          0.8    10      Scrub     5.1  TRUE            3

names(worms) # gives a list of variable/column names

## [1] "Area"         "Slope"        "Vegetation"   "Soil.pH"      "Damp"        
## [6] "Worm.density"

attach(worms) # makes accessible names accessible in this code
summary(worms) # summary stats. "Field.Name" clumn was not summarized since it contains row names (unique data points).

##       Area           Slope        Vegetation           Soil.pH     
##  Min.   :0.800   Min.   : 0.00   Length:20          Min.   :3.500  
##  1st Qu.:2.175   1st Qu.: 0.75   Class :character   1st Qu.:4.100  
##  Median :3.000   Median : 2.00   Mode  :character   Median :4.600  
##  Mean   :2.990   Mean   : 3.50                      Mean   :4.555  
##  3rd Qu.:3.725   3rd Qu.: 5.25                      3rd Qu.:5.000  
##  Max.   :5.100   Max.   :11.00                      Max.   :5.700  
##     Damp          Worm.density 
##  Mode :logical   Min.   :0.00  
##  FALSE:14        1st Qu.:2.00  
##  TRUE :6         Median :4.00  
##                  Mean   :4.35  
##                  3rd Qu.:6.25  
##                  Max.   :9.00

worms[2,4] # different ways to use "subscripts" to access data in specific dataframe locations

## [1] 5.2

worms[,4]

##  [1] 4.1 5.2 4.3 4.9 4.2 3.9 4.2 4.8 5.7 5.0 5.2 4.1 4.0 3.8 5.0 4.9 4.7 4.5 3.5
## [20] 5.1

worms[3,]

##               Area Slope Vegetation Soil.pH  Damp Worm.density
## Nursery.Field  2.8     3  Grassland     4.3 FALSE            2

worms[1:3,c(1,2,4,6)]

##                Area Slope Soil.pH Worm.density
## Nashs.Field     3.6    11     4.1            4
## Silwood.Bottom  5.1     2     5.2            7
## Nursery.Field   2.8     3     4.3            2

worms[Soil.pH>4 & Worm.density>5,] #using subscripts and logical operators (to set conditions) together

##                 Area Slope Vegetation Soil.pH  Damp Worm.density
## Silwood.Bottom   5.1     2     Arable     5.2 FALSE            7
## Gunness.Thicket  3.8     0      Scrub     4.2 FALSE            6
## The.Orchard      1.9     0    Orchard     5.7 FALSE            9
## Rookery.Slope    1.5     4  Grassland     5.0  TRUE            7
## Garden.Wood      2.9    10      Scrub     5.2 FALSE            8
## Pond.Field       4.1     0     Meadow     5.0  TRUE            6
## Water.Meadow     3.9     0     Meadow     4.9  TRUE            8

worms[order(worms[,1]),] # ordering all rows by "Area", i.e., column 1

##                   Area Slope Vegetation Soil.pH  Damp Worm.density
## Farm.Wood          0.8    10      Scrub     5.1  TRUE            3
## Rookery.Slope      1.5     4  Grassland     5.0  TRUE            7
## Observatory.Ridge  1.8     6  Grassland     3.8 FALSE            0
## The.Orchard        1.9     0    Orchard     5.7 FALSE            9
## Ashurst            2.1     0     Arable     4.8 FALSE            4
## Cheapside          2.2     8      Scrub     4.7  TRUE            4
## Rush.Meadow        2.4     5     Meadow     4.9  TRUE            5
## Nursery.Field      2.8     3  Grassland     4.3 FALSE            2
## Garden.Wood        2.9    10      Scrub     5.2 FALSE            8
## Gravel.Pit         2.9     1  Grassland     3.5 FALSE            1
## Oak.Mead           3.1     2  Grassland     3.9 FALSE            2
## North.Gravel       3.3     1  Grassland     4.1 FALSE            1
## Church.Field       3.5     3  Grassland     4.2 FALSE            3
## Nashs.Field        3.6    11  Grassland     4.1 FALSE            4
## South.Gravel       3.7     2  Grassland     4.0 FALSE            2
## Gunness.Thicket    3.8     0      Scrub     4.2 FALSE            6
## Water.Meadow       3.9     0     Meadow     4.9  TRUE            8
## Pond.Field         4.1     0     Meadow     5.0  TRUE            6
## Pound.Hill         4.4     2     Arable     4.5 FALSE            5
## Silwood.Bottom     5.1     2     Arable     5.2 FALSE            7

worms[rev(order(worms[,1])),] # same as above, but in descending order, rev() reverses the argument.

##                   Area Slope Vegetation Soil.pH  Damp Worm.density
## Silwood.Bottom     5.1     2     Arable     5.2 FALSE            7
## Pound.Hill         4.4     2     Arable     4.5 FALSE            5
## Pond.Field         4.1     0     Meadow     5.0  TRUE            6
## Water.Meadow       3.9     0     Meadow     4.9  TRUE            8
## Gunness.Thicket    3.8     0      Scrub     4.2 FALSE            6
## South.Gravel       3.7     2  Grassland     4.0 FALSE            2
## Nashs.Field        3.6    11  Grassland     4.1 FALSE            4
## Church.Field       3.5     3  Grassland     4.2 FALSE            3
## North.Gravel       3.3     1  Grassland     4.1 FALSE            1
## Oak.Mead           3.1     2  Grassland     3.9 FALSE            2
## Gravel.Pit         2.9     1  Grassland     3.5 FALSE            1
## Garden.Wood        2.9    10      Scrub     5.2 FALSE            8
## Nursery.Field      2.8     3  Grassland     4.3 FALSE            2
## Rush.Meadow        2.4     5     Meadow     4.9  TRUE            5
## Cheapside          2.2     8      Scrub     4.7  TRUE            4
## Ashurst            2.1     0     Arable     4.8 FALSE            4
## The.Orchard        1.9     0    Orchard     5.7 FALSE            9
## Observatory.Ridge  1.8     6  Grassland     3.8 FALSE            0
## Rookery.Slope      1.5     4  Grassland     5.0  TRUE            7
## Farm.Wood          0.8    10      Scrub     5.1  TRUE            3

order(Area)

##  [1] 20 10 14  9  8 17  4  3 11 19  6 12  7  1 13  5 16 15 18  2

Vegetation[order(Slope)] # this outputs vegetation names ordered by slope. can use rev() to order descending

##  [1] "Scrub"     "Arable"    "Orchard"   "Meadow"    "Meadow"    "Grassland"
##  [7] "Grassland" "Arable"    "Grassland" "Grassland" "Arable"    "Grassland"
## [13] "Grassland" "Grassland" "Meadow"    "Grassland" "Scrub"     "Scrub"    
## [19] "Scrub"     "Grassland"

LOGICAL SUBSCRIPTS IN R DATAFRAMES:

x = 0:10
sum(x)      # this sums all values of x

## [1] 55

x<7         # this outputs results of T/F operation applied to individual elements of x

##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE

1*(x<7)     # this converts T/F results of above statement to numeric 1/0

##  [1] 1 1 1 1 1 1 1 0 0 0 0

sum(x<7)    # this sums the no. of occurences when x<7

## [1] 7

sum(x[x<7]) # this sums up all numbers in x that are <7

## [1] 21

y = c(8,5,3,9,1,0,4,2,7,6) # find the top 3 elements of vector y
rev(sort(y))[1:3]     # this outputs top 3 elements

## [1] 9 8 7

sum(rev(sort(y))[1:3])# this outputs sum of top 3 elements

## [1] 24

which(y>5)            # this outputs positions of elements satisfying y>5 in y

## [1]  1  4  9 10

y[(y>5)]              # this outputs the the exact elements satisfying y>5

## [1] 8 9 7 6

sample(y)             # sampling without replacement

##  [1] 7 8 0 6 3 1 2 4 5 9

sample(y,replace=T)   # sampling with replacement

##  [1] 7 1 2 9 5 4 6 3 6 4

c = y[-4]            # removing specific elements from a vector
c

## [1] 8 5 3 1 0 4 2 7 6

trim.mean = function(y) mean(sort(y)[-c(1,length(y))]) # defining a new function to produce a trimmed mean for y. Here, we are removing the first and last values to trim the vector, then sorting it and finding its mean.
trim.mean(y)

## [1] 4.5

LOGICAL ARITHEMATIC IN R:

m = c(8,3,5,7,6,6,8,9,2,3,9,4,10,4,11)
m

##  [1]  8  3  5  7  6  6  8  9  2  3  9  4 10  4 11

# find the total number of instances when m is either very low or high. For definition, low=mean(y)-2, high=mean(y)+2
sum(m<mean(m)-2 | m>mean(m)+2)

## [1] 9

# now find the sum of instances discovered through above command
sum(m[m<mean(m)-2 | m>mean(m)+2])

## [1] 55

# replace values in m using for() and if()
#for(i in 1:length(m)) {if(m[i]<(mean(m)-2)) (m[i] = 0)}
#m

# performing above operation using in-built function
m[m<(mean(m)-2)] = 0
m

##  [1]  8  0  5  7  6  6  8  9  0  0  9  0 10  0 11

PLOTTING MATHEMATICAL FUNCTIONS IN R:

# PLOTTING OVERLAYED FUNCTIONS - when the assumed values of constants change in the equation (from 3 to 2 and from 0.1 to 0.05, in this example) 
x = 0:50
y1 = 3*(1-exp(-0.1*x))
plot(x,y1,type="l")
y2 = 2*(1-exp(-0.05*x)) # defining a new variable y2 to be added to the above plot
lines(x,y2,lty=2)        # adding a new graph to an existing plot
y3 = 1*(1-exp(-0.025*x)) 
lines(x,y3,lty=3)

MATRICES IN R:

g = matrix(c(8,3,5,7,6,6,8,9,2,3,9,4,10,4,11),nrow=5)
h = matrix(c(25,37,13),ncol=1)
g

##      [,1] [,2] [,3]
## [1,]    8    6    9
## [2,]    3    8    4
## [3,]    5    9   10
## [4,]    7    2    4
## [5,]    6    3   11

##      [,1]
## [1,]   25
## [2,]   37
## [3,]   13

g %*% h # matrix multiplication. IMPORTANT - ncol(g) should be = nrow(h)

##      [,1]
## [1,]  539
## [2,]  423
## [3,]  588
## [4,]  301
## [5,]  404