##Week 2 Homework ##By Catherine Cho

Question 1 Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

#data set of bird strikes in US
birds<-read.csv("/Users/catherinecho/Documents/Data Sets/birds.csv")
summary(birds)
##        X             opid             operator            atype          
##  Min.   :    1   Length:19302       Length:19302       Length:19302      
##  1st Qu.: 4826   Class :character   Class :character   Class :character  
##  Median : 9652   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 9652                                                           
##  3rd Qu.:14477                                                           
##  Max.   :19302                                                           
##                                                                          
##    remarks          phase_of_flt          ac_mass         num_engs    
##  Length:19302       Length:19302       Min.   :1.000   Min.   :1.000  
##  Class :character   Class :character   1st Qu.:3.000   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :4.000   Median :2.000  
##                                        Mean   :3.362   Mean   :2.096  
##                                        3rd Qu.:4.000   3rd Qu.:2.000  
##                                        Max.   :5.000   Max.   :4.000  
##                                        NA's   :1284    NA's   :1307   
##      date           time_of_day           state               height       
##  Length:19302       Length:19302       Length:19302       Min.   :    0.0  
##  Class :character   Class :character   Class :character   1st Qu.:    0.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :   40.0  
##                                                           Mean   :  754.7  
##                                                           3rd Qu.:  500.0  
##                                                           Max.   :32500.0  
##                                                           NA's   :3193     
##      speed          effect              sky              species         
##  Min.   :  0.0   Length:19302       Length:19302       Length:19302      
##  1st Qu.:110.0   Class :character   Class :character   Class :character  
##  Median :130.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :136.1                                                           
##  3rd Qu.:150.0                                                           
##  Max.   :400.0                                                           
##  NA's   :7008                                                            
##   birds_seen        birds_struck      
##  Length:19302       Length:19302      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
mean(birds$height, na.rm=TRUE)
## [1] 754.6778
median(birds$height, na.rm=TRUE)
## [1] 40
mean(birds$speed, na.rm=TRUE)
## [1] 136.0993
median(birds$speed, na.rm=TRUE)
## [1] 130

Question 2 Create a new data frame with a subset of the columns and rows. Make sure to rename it.

#subset data of bird strike time_of_day, state, height, and speed of Southwest Airlines
birds_1<-subset(birds,operator=="SOUTHWEST AIRLINES",select=c(10:13))

Question 3 Create new column names for the new data frame.

colnames(birds_1)<-c("time","state_in_US","height_ft","speed_knots")

Question 4 Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(birds_1)
##      time           state_in_US          height_ft      speed_knots   
##  Length:1096        Length:1096        Min.   :    0   Min.   : 30.0  
##  Class :character   Class :character   1st Qu.:    0   1st Qu.:130.0  
##  Mode  :character   Mode  :character   Median :  100   Median :140.0  
##                                        Mean   : 1271   Mean   :156.2  
##                                        3rd Qu.: 1500   3rd Qu.:180.0  
##                                        Max.   :15000   Max.   :320.0  
##                                        NA's   :89      NA's   :161
mean(birds_1$height_ft, na.rm=TRUE)
## [1] 1271.117
median(birds_1$height, na.rm=TRUE)
## [1] 100
mean(birds_1$speed_knots, na.rm=TRUE)
## [1] 156.2439
median(birds_1$speed_knots, na.rm=TRUE)
## [1] 140

Question 5 For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.

#renamed "night" values to "evening" in column "time"
birds_1$time[birds_1$time=="Night"]<-"Evening"

Question 6 Display enough rows to see examples of all of steps 1-5 above.

#display first 20 rows
head(birds_1,n=20)
##        time state_in_US height_ft speed_knots
## 77      Day          CA      1200         140
## 80  Evening          TX      4000         250
## 132     Day          TX         0         140
## 167     Day          CA       150         130
## 175     Day          TX       500         145
## 225     Day          TX        NA         180
## 231     Day          IN         0          95
## 293 Evening          CA       150         125
## 303     Day          CA        10         135
## 320     Day          CA         0         100
## 325 Evening          TX       700         135
## 326     Day          TX         0         120
## 368 Evening          OK      1000         180
## 371     Day          OK        50         135
## 442 Evening          TX      3000         180
## 449 Evening        <NA>        NA          NA
## 503     Day          CA       100         138
## 510     Day          TX         0          NA
## 530     Day          AR         0         100
## 547     Day          TX       100         145

Question 7: Bonus BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.

library(readr)
urlfile<-"https://raw.githubusercontent.com/catcho1632/Birds/main/birds.csv"
birddata<-read_csv(url(urlfile))
## New names:
## * `` -> ...1
## Rows: 19302 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): opid, operator, atype, remarks, phase_of_flt, date, time_of_day, s...
## dbl  (5): ...1, ac_mass, num_engs, height, speed
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.