Ph.D. Cpurse Work -2024 on Quantitative Methods

Day 3 material

———————————————————————–

# data frame
per=c("Rahul","Parimal","Rakhi")
age=c(24,25,24)
jtype=c("Govt","Private","Business")
df1=data.frame(per,age,jtype)
df1
##       per age    jtype
## 1   Rahul  24     Govt
## 2 Parimal  25  Private
## 3   Rakhi  24 Business
#Variable names of the data frame
df1
##       per age    jtype
## 1   Rahul  24     Govt
## 2 Parimal  25  Private
## 3   Rakhi  24 Business
names(df1)
## [1] "per"   "age"   "jtype"
View(df1)
dim(df1)
## [1] 3 3
names(df1)=c("Person","Age","Job")
df1
##    Person Age      Job
## 1   Rahul  24     Govt
## 2 Parimal  25  Private
## 3   Rakhi  24 Business
#observation number of the data frame
rownames(df1)=c("Row1","Row2","Row3") 
df1
##       Person Age      Job
## Row1   Rahul  24     Govt
## Row2 Parimal  25  Private
## Row3   Rakhi  24 Business
df1$Job=factor(df1$Job)
#str() gives a short description about the elements (it can also be used on every R object)
str(df1)
## 'data.frame':    3 obs. of  3 variables:
##  $ Person: chr  "Rahul" "Parimal" "Rakhi"
##  $ Age   : num  24 25 24
##  $ Job   : Factor w/ 3 levels "Business","Govt",..: 2 3 1
summary(df1)
##     Person               Age              Job   
##  Length:3           Min.   :24.00   Business:1  
##  Class :character   1st Qu.:24.00   Govt    :1  
##  Mode  :character   Median :24.00   Private :1  
##                     Mean   :24.33               
##                     3rd Qu.:24.50               
##                     Max.   :25.00
df1[,]
##       Person Age      Job
## Row1   Rahul  24     Govt
## Row2 Parimal  25  Private
## Row3   Rakhi  24 Business
df1
##       Person Age      Job
## Row1   Rahul  24     Govt
## Row2 Parimal  25  Private
## Row3   Rakhi  24 Business
View(cars)
# Assign Car object into a new data frame
dfCar=cars
dfCar
##    speed dist
## 1      4    2
## 2      4   10
## 3      7    4
## 4      7   22
## 5      8   16
## 6      9   10
## 7     10   18
## 8     10   26
## 9     10   34
## 10    11   17
## 11    11   28
## 12    12   14
## 13    12   20
## 14    12   24
## 15    12   28
## 16    13   26
## 17    13   34
## 18    13   34
## 19    13   46
## 20    14   26
## 21    14   36
## 22    14   60
## 23    14   80
## 24    15   20
## 25    15   26
## 26    15   54
## 27    16   32
## 28    16   40
## 29    17   32
## 30    17   40
## 31    17   50
## 32    18   42
## 33    18   56
## 34    18   76
## 35    18   84
## 36    19   36
## 37    19   46
## 38    19   68
## 39    20   32
## 40    20   48
## 41    20   52
## 42    20   56
## 43    20   64
## 44    22   66
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
# nrow,ncol, dim, dimnames, names 
names(dfCar)
## [1] "speed" "dist"
# Subset of data frame, head, tail
tail(dfCar)
##    speed dist
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
head(dfCar)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
# Keep all the variables from 3rd observation
dfCar[3,]
##   speed dist
## 3     7    4
# keep the 2nd variables for all observations
dfCar[,2]
##  [1]   2  10   4  22  16  10  18  26  34  17  28  14  20  24  28  26  34  34  46
## [20]  26  36  60  80  20  26  54  32  40  32  40  50  42  56  76  84  36  46  68
## [39]  32  48  52  56  64  66  54  70  92  93 120  85
# keep observations 1 to 5, 12 and 15
dfCar[c(1:5,12,15),]
##    speed dist
## 1      4    2
## 2      4   10
## 3      7    4
## 4      7   22
## 5      8   16
## 12    12   14
## 15    12   28
# remove observations 10 to 20.
dfCar[-(10:20),]
##    speed dist
## 1      4    2
## 2      4   10
## 3      7    4
## 4      7   22
## 5      8   16
## 6      9   10
## 7     10   18
## 8     10   26
## 9     10   34
## 21    14   36
## 22    14   60
## 23    14   80
## 24    15   20
## 25    15   26
## 26    15   54
## 27    16   32
## 28    16   40
## 29    17   32
## 30    17   40
## 31    17   50
## 32    18   42
## 33    18   56
## 34    18   76
## 35    18   84
## 36    19   36
## 37    19   46
## 38    19   68
## 39    20   32
## 40    20   48
## 41    20   52
## 42    20   56
## 43    20   64
## 44    22   66
## 45    23   54
## 46    24   70
## 47    24   92
## 48    24   93
## 49    24  120
## 50    25   85
# keep the last observation using nrow

#Set directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024"
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024/Data"
# Load combined.csv file into R object
library(readr)
survey=read_csv("combined.csv")
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(survey)
# Select species_id column for first 2 rows.
names(survey)
##  [1] "record_id"       "month"           "day"             "year"           
##  [5] "plot_id"         "species_id"      "sex"             "hindfoot_length"
##  [9] "weight"          "genus"           "species"         "taxa"           
## [13] "plot_type"
survey[1:2,6]
## # A tibble: 2 × 1
##   species_id
##   <chr>     
## 1 NL        
## 2 NL
# select all the male animals surveyed in the year 1980

# select the last row of the data
# select the middile row of theh data.
# Create factors for the variables taxa and genus