———————————————————————–
# data frame
per=c("Rahul","Parimal","Rakhi")
age=c(24,25,24)
jtype=c("Govt","Private","Business")
df1=data.frame(per,age,jtype)
df1
## per age jtype
## 1 Rahul 24 Govt
## 2 Parimal 25 Private
## 3 Rakhi 24 Business
#Variable names of the data frame
df1
## per age jtype
## 1 Rahul 24 Govt
## 2 Parimal 25 Private
## 3 Rakhi 24 Business
names(df1)
## [1] "per" "age" "jtype"
View(df1)
dim(df1)
## [1] 3 3
names(df1)=c("Person","Age","Job")
df1
## Person Age Job
## 1 Rahul 24 Govt
## 2 Parimal 25 Private
## 3 Rakhi 24 Business
#observation number of the data frame
rownames(df1)=c("Row1","Row2","Row3")
df1
## Person Age Job
## Row1 Rahul 24 Govt
## Row2 Parimal 25 Private
## Row3 Rakhi 24 Business
df1$Job=factor(df1$Job)
#str() gives a short description about the elements (it can also be used on every R object)
str(df1)
## 'data.frame': 3 obs. of 3 variables:
## $ Person: chr "Rahul" "Parimal" "Rakhi"
## $ Age : num 24 25 24
## $ Job : Factor w/ 3 levels "Business","Govt",..: 2 3 1
summary(df1)
## Person Age Job
## Length:3 Min. :24.00 Business:1
## Class :character 1st Qu.:24.00 Govt :1
## Mode :character Median :24.00 Private :1
## Mean :24.33
## 3rd Qu.:24.50
## Max. :25.00
df1[,]
## Person Age Job
## Row1 Rahul 24 Govt
## Row2 Parimal 25 Private
## Row3 Rakhi 24 Business
df1
## Person Age Job
## Row1 Rahul 24 Govt
## Row2 Parimal 25 Private
## Row3 Rakhi 24 Business
View(cars)
# Assign Car object into a new data frame
dfCar=cars
dfCar
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
# nrow,ncol, dim, dimnames, names
names(dfCar)
## [1] "speed" "dist"
# Subset of data frame, head, tail
tail(dfCar)
## speed dist
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
head(dfCar)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
# Keep all the variables from 3rd observation
dfCar[3,]
## speed dist
## 3 7 4
# keep the 2nd variables for all observations
dfCar[,2]
## [1] 2 10 4 22 16 10 18 26 34 17 28 14 20 24 28 26 34 34 46
## [20] 26 36 60 80 20 26 54 32 40 32 40 50 42 56 76 84 36 46 68
## [39] 32 48 52 56 64 66 54 70 92 93 120 85
# keep observations 1 to 5, 12 and 15
dfCar[c(1:5,12,15),]
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 12 12 14
## 15 12 28
# remove observations 10 to 20.
dfCar[-(10:20),]
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
# keep the last observation using nrow
#Set directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024"
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024/Data"
# Load combined.csv file into R object
library(readr)
survey=read_csv("combined.csv")
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(survey)
# Select species_id column for first 2 rows.
names(survey)
## [1] "record_id" "month" "day" "year"
## [5] "plot_id" "species_id" "sex" "hindfoot_length"
## [9] "weight" "genus" "species" "taxa"
## [13] "plot_type"
survey[1:2,6]
## # A tibble: 2 × 1
## species_id
## <chr>
## 1 NL
## 2 NL
# select all the male animals surveyed in the year 1980
# select the last row of the data
# select the middile row of theh data.
# Create factors for the variables taxa and genus