For Assignment 1, I created a dataframe on flight schedules we usually see at the airport.

1. Creating a dataframe

Flight <- c(102,255,329,180,321,216,114,260)
Destination <- c("Paris", "Tokyo", "Sydney", "London", "Beijing", "Singapore", "Delhi","New York")
Departure <- c("09:00","11:45","10:30", "14:30", "16:00", "21:15", "22:45", "16:30")
capacity <- c(165,248,248,248,440,165,440,248)
Status <- c("Delayed","Cancelled","Boarding", "Boarding", "On Time", "On Time", "Cancelled", "Delayed")

df <- data.frame(Flight,Destination,Departure,capacity,Status)
df
##   Flight Destination Departure capacity    Status
## 1    102       Paris     09:00      165   Delayed
## 2    255       Tokyo     11:45      248 Cancelled
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
## 7    114       Delhi     22:45      440 Cancelled
## 8    260    New York     16:30      248   Delayed

2. Exploring dataframe

a. Components of the dataframe
str(df)
## 'data.frame':    8 obs. of  5 variables:
##  $ Flight     : num  102 255 329 180 321 216 114 260
##  $ Destination: Factor w/ 8 levels "Beijing","Delhi",..: 5 8 7 3 1 6 2 4
##  $ Departure  : Factor w/ 8 levels "09:00","10:30",..: 1 3 2 4 5 7 8 6
##  $ capacity   : num  165 248 248 248 440 165 440 248
##  $ Status     : Factor w/ 4 levels "Boarding","Cancelled",..: 3 2 1 1 4 4 2 3
class(df)
## [1] "data.frame"
typeof(df)
## [1] "list"
b. Columns names, number of columns and number of rows.
names(df)
## [1] "Flight"      "Destination" "Departure"   "capacity"    "Status"
nrow(df)
## [1] 8
ncol(df)
## [1] 5
length(df)
## [1] 5
c. Assign rows name.
row.names(df)<-c(1,2,3,4,5,6,7,8)
df
##   Flight Destination Departure capacity    Status
## 1    102       Paris     09:00      165   Delayed
## 2    255       Tokyo     11:45      248 Cancelled
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
## 7    114       Delhi     22:45      440 Cancelled
## 8    260    New York     16:30      248   Delayed
d. Return the first 6 rows of the dataframe
head(df)
##   Flight Destination Departure capacity    Status
## 1    102       Paris     09:00      165   Delayed
## 2    255       Tokyo     11:45      248 Cancelled
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
e. Return the last 6 rows of the dataframe
tail(df)
##   Flight Destination Departure capacity    Status
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
## 7    114       Delhi     22:45      440 Cancelled
## 8    260    New York     16:30      248   Delayed
f. Accessing the column by its name
df["Destination"]
##   Destination
## 1       Paris
## 2       Tokyo
## 3      Sydney
## 4      London
## 5     Beijing
## 6   Singapore
## 7       Delhi
## 8    New York
df$Destination
## [1] Paris     Tokyo     Sydney    London    Beijing   Singapore Delhi    
## [8] New York 
## Levels: Beijing Delhi London New York Paris Singapore Sydney Tokyo
df[[2]]
## [1] Paris     Tokyo     Sydney    London    Beijing   Singapore Delhi    
## [8] New York 
## Levels: Beijing Delhi London New York Paris Singapore Sydney Tokyo
g. Accessing an element in the column
df[["Status"]][3]
## [1] Boarding
## Levels: Boarding Cancelled Delayed On Time
df$Status[3]
## [1] Boarding
## Levels: Boarding Cancelled Delayed On Time
df[4,2]
## [1] London
## Levels: Beijing Delhi London New York Paris Singapore Sydney Tokyo
h. Getting the 5th row
df[5,]
##   Flight Destination Departure capacity  Status
## 5    321     Beijing     16:00      440 On Time
i. Getting the 4th column
df1<-df[,4]
df1
## [1] 165 248 248 248 440 165 440 248
j. Type of the data
class(df1)
## [1] "numeric"
k. As the result return as vector, to change it back to dataframe use (drop=FALSE)
df2<-df[,4, drop=FALSE]
df2
##   capacity
## 1      165
## 2      248
## 3      248
## 4      248
## 5      440
## 6      165
## 7      440
## 8      248
class(df2)
## [1] "data.frame"

3. Modifying dataframe

a. Change the 2nd row capacity (248 -> 440)
df[2,"capacity"]<- 440; df
##   Flight Destination Departure capacity    Status
## 1    102       Paris     09:00      165   Delayed
## 2    255       Tokyo     11:45      440 Cancelled
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
## 7    114       Delhi     22:45      440 Cancelled
## 8    260    New York     16:30      248   Delayed
b. Chnage the column name (capacity -> Capacity)
names(df)[names(df)=='capacity']<-'Capacity'
df
##   Flight Destination Departure Capacity    Status
## 1    102       Paris     09:00      165   Delayed
## 2    255       Tokyo     11:45      440 Cancelled
## 3    329      Sydney     10:30      248  Boarding
## 4    180      London     14:30      248  Boarding
## 5    321     Beijing     16:00      440   On Time
## 6    216   Singapore     21:15      165   On Time
## 7    114       Delhi     22:45      440 Cancelled
## 8    260    New York     16:30      248   Delayed
c. Adding new variable (column)
df4<-cbind(df,Gate=c(3,12,4,15,7,9,27,30))
df4
##   Flight Destination Departure Capacity    Status Gate
## 1    102       Paris     09:00      165   Delayed    3
## 2    255       Tokyo     11:45      440 Cancelled   12
## 3    329      Sydney     10:30      248  Boarding    4
## 4    180      London     14:30      248  Boarding   15
## 5    321     Beijing     16:00      440   On Time    7
## 6    216   Singapore     21:15      165   On Time    9
## 7    114       Delhi     22:45      440 Cancelled   27
## 8    260    New York     16:30      248   Delayed   30
d. Adding new observation (row)
bangkok<-data.frame(Flight=349, Destination="Bangkok", Departure="22.00", Capacity=165, Status="On Time", Gate=11)

rbind(df4, bangkok)
##    Flight Destination Departure Capacity    Status Gate
## 1     102       Paris     09:00      165   Delayed    3
## 2     255       Tokyo     11:45      440 Cancelled   12
## 3     329      Sydney     10:30      248  Boarding    4
## 4     180      London     14:30      248  Boarding   15
## 5     321     Beijing     16:00      440   On Time    7
## 6     216   Singapore     21:15      165   On Time    9
## 7     114       Delhi     22:45      440 Cancelled   27
## 8     260    New York     16:30      248   Delayed   30
## 11    349     Bangkok     22.00      165   On Time   11
e. Retrieve the components information (mean, median, quatiles)
summary(df4)
##      Flight         Destination   Departure    Capacity           Status 
##  Min.   :102.0   Beijing  :1    09:00  :1   Min.   :165.0   Boarding :2  
##  1st Qu.:163.5   Delhi    :1    10:30  :1   1st Qu.:227.2   Cancelled:2  
##  Median :235.5   London   :1    11:45  :1   Median :248.0   Delayed  :2  
##  Mean   :222.1   New York :1    14:30  :1   Mean   :299.2   On Time  :2  
##  3rd Qu.:275.2   Paris    :1    16:00  :1   3rd Qu.:440.0                
##  Max.   :329.0   Singapore:1    16:30  :1   Max.   :440.0                
##                  (Other)  :2    (Other):2                                
##       Gate      
##  Min.   : 3.00  
##  1st Qu.: 6.25  
##  Median :10.50  
##  Mean   :13.38  
##  3rd Qu.:18.00  
##  Max.   :30.00  
## 
f. Sorting data in ascending order (Column:Gate)
df4[order(df4$Gate, decreasing=FALSE),]
##   Flight Destination Departure Capacity    Status Gate
## 1    102       Paris     09:00      165   Delayed    3
## 3    329      Sydney     10:30      248  Boarding    4
## 5    321     Beijing     16:00      440   On Time    7
## 6    216   Singapore     21:15      165   On Time    9
## 2    255       Tokyo     11:45      440 Cancelled   12
## 4    180      London     14:30      248  Boarding   15
## 7    114       Delhi     22:45      440 Cancelled   27
## 8    260    New York     16:30      248   Delayed   30
g. Removing component in dataframe (Remove: Capacity)
df4$Capacity <- NULL
df4
##   Flight Destination Departure    Status Gate
## 1    102       Paris     09:00   Delayed    3
## 2    255       Tokyo     11:45 Cancelled   12
## 3    329      Sydney     10:30  Boarding    4
## 4    180      London     14:30  Boarding   15
## 5    321     Beijing     16:00   On Time    7
## 6    216   Singapore     21:15   On Time    9
## 7    114       Delhi     22:45 Cancelled   27
## 8    260    New York     16:30   Delayed   30
h. Removing a row (3rd row)
df4 <-df4[-3,]
df4
##   Flight Destination Departure    Status Gate
## 1    102       Paris     09:00   Delayed    3
## 2    255       Tokyo     11:45 Cancelled   12
## 4    180      London     14:30  Boarding   15
## 5    321     Beijing     16:00   On Time    7
## 6    216   Singapore     21:15   On Time    9
## 7    114       Delhi     22:45 Cancelled   27
## 8    260    New York     16:30   Delayed   30

Thank you