1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes of your data.

#Named US Atlantic Hurricanes 
#Read from my Github
hurricane <- read.csv("https://raw.githubusercontent.com/ex-pr/Dataset/main/hurricNamed.csv", header=TRUE, sep=",")
#summary of the dateset and view dataset
summary(hurricane) 
##       X                 Name                Year       LF.WindsMPH   
##  Length:94          Length:94          Min.   :1950   Min.   : 75.0  
##  Class :character   Class :character   1st Qu.:1964   1st Qu.: 85.0  
##  Mode  :character   Mode  :character   Median :1985   Median :105.0  
##                                        Mean   :1982   Mean   :104.7  
##                                        3rd Qu.:1999   3rd Qu.:120.0  
##                                        Max.   :2012   Max.   :190.0  
##  LF.PressureMB       LF.times       BaseDamage          NDAM2014    
##  Min.   : 909.0   Min.   :1.000   Min.   :    0.20   Min.   :    1  
##  1st Qu.: 950.0   1st Qu.:1.000   1st Qu.:   25.75   1st Qu.:  290  
##  Median : 963.5   Median :1.000   Median :  200.00   Median : 2090  
##  Mean   : 964.4   Mean   :1.117   Mean   : 3340.70   Mean   : 8433  
##  3rd Qu.: 982.8   3rd Qu.:1.000   3rd Qu.: 1500.00   3rd Qu.: 9050  
##  Max.   :1003.0   Max.   :3.000   Max.   :81000.00   Max.   :88420  
##  AffectedStates       firstLF              deaths             mf           
##  Length:94          Length:94          Min.   :   0.00   Length:94         
##  Class :character   Class :character   1st Qu.:   2.00   Class :character  
##  Mode  :character   Mode  :character   Median :   5.00   Mode  :character  
##                                        Mean   :  44.17                     
##                                        3rd Qu.:  21.00                     
##                                        Max.   :1836.00                     
##   BaseDam2014      
##  Min.   :    1.04  
##  1st Qu.:   93.11  
##  Median :  908.33  
##  Mean   : 4830.19  
##  3rd Qu.: 3341.62  
##  Max.   :98195.39
View(hurricane)
#Find mean and median for damage $ and deaths
basedamage_mean <-mean(hurricane$BaseDamage)
basedamage_median <-median(hurricane$BaseDamage)
deaths_mean <-mean(hurricane$deaths)
deaths_median <-median(hurricane$deaths)
#Print result
print(sprintf("Damage in millions of $ mean is %f and median is %f", basedamage_mean, basedamage_median))
## [1] "Damage in millions of $ mean is 3340.697154 and median is 200.000000"
print(sprintf("Number of continental US deaths mean is %f and median is %f", deaths_mean, deaths_median))
## [1] "Number of continental US deaths mean is 44.170213 and median is 5.000000"
# Output
#[1] "Damage in millions of $ mean is 3340.697154 and median is 200.000000"
#[1] "Number of continental US deaths mean is 44.170213 and median is 5.000000"

#2,3. Create a new data frame with a subset of the columns AND rows.Create new column names for each column in the new data frame

#Create new dataset with 10 rows and 5 columns
new_hurricane <- hurricane[1:10, c(2, 3, 7, 9, 11)]
#Rename columns
colnames(new_hurricane) <- c("Hurricane Name", "Year occured", "Property damage", "States affected", "Number of continental US deaths")
summary(new_hurricane)
##  Hurricane Name      Year occured  Property damage   States affected   
##  Length:10          Min.   :1950   Min.   :  0.200   Length:10         
##  Class :character   1st Qu.:1952   1st Qu.:  2.888   Class :character  
##  Mode  :character   Median :1954   Median : 34.000   Mode  :character  
##                     Mean   :1953   Mean   :105.648                     
##                     3rd Qu.:1954   3rd Qu.:160.000                     
##                     Max.   :1955   Max.   :460.228                     
##  Number of continental US deaths
##  Min.   :  0.00                 
##  1st Qu.:  1.25                 
##  Median :  3.50                 
##  Mean   : 31.00                 
##  3rd Qu.: 20.00                 
##  Max.   :200.00
#Find new values for meand/meadian damage and deaths
newdamage_mean <-mean(new_hurricane[,3])
newdamage_median <-median(new_hurricane[,3])
newdeaths_mean <-mean(new_hurricane[,5])
newdeaths_median <-median(new_hurricane[,5])
#Print new results and compare to previous values
print(sprintf("Damage in millions of $ mean is %f and median is %f in the new dataset", newdamage_mean, newdamage_median))
## [1] "Damage in millions of $ mean is 105.647750 and median is 34.000000 in the new dataset"
print(sprintf("Number of continental US deaths mean is %f and median is %f in the new dataset", newdeaths_mean, newdeaths_median))
## [1] "Number of continental US deaths mean is 31.000000 and median is 3.500000 in the new dataset"
print(sprintf("Damage in millions of $ mean changed from %f to %f and median changed from %f to %f in the new dataset", basedamage_mean, newdamage_mean, basedamage_median, newdamage_median))
## [1] "Damage in millions of $ mean changed from 3340.697154 to 105.647750 and median changed from 200.000000 to 34.000000 in the new dataset"
print(sprintf("Number of continental US deaths mean changed from %f to %f and median changed from %f to %f in the new dataset", deaths_mean, newdeaths_mean, deaths_median, newdeaths_median))
## [1] "Number of continental US deaths mean changed from 44.170213 to 31.000000 and median changed from 5.000000 to 3.500000 in the new dataset"
print("Damage in millions of $: mean and median became smaller in the new dataset as we are using only first 10 rows from the original dataset that covers years 1950-1955, the original data set covers years 1950-2012, more buildings, roads, etc were built since 1955 that can be destroyed by a hurricane. As time goes on, there is more damage from hurricanes as there is more things to destroy. Also, since max and min values define mean and median too, max damage from hurricane in the original data set was 81000 milllions of$, in the new dataset that covers only 1950-1955 it is 460 millions of $, so median moved closer to the left, became smaller.")
## [1] "Damage in millions of $: mean and median became smaller in the new dataset as we are using only first 10 rows from the original dataset that covers years 1950-1955, the original data set covers years 1950-2012, more buildings, roads, etc were built since 1955 that can be destroyed by a hurricane. As time goes on, there is more damage from hurricanes as there is more things to destroy. Also, since max and min values define mean and median too, max damage from hurricane in the original data set was 81000 milllions of$, in the new dataset that covers only 1950-1955 it is 460 millions of $, so median moved closer to the left, became smaller."
print("Number of continental US deaths: mean and median became smaller in the new dataset. Maximun deaths changed from 1836 to 200, as a result, median moved to the left, became smaller as well as mean. Less people lived at 1950-1955 than at the recent times. Population becomes larger, number of lives that can be affected by a hurricane has increased")
## [1] "Number of continental US deaths: mean and median became smaller in the new dataset. Maximun deaths changed from 1836 to 200, as a result, median moved to the left, became smaller as well as mean. Less people lived at 1950-1955 than at the recent times. Population becomes larger, number of lives that can be affected by a hurricane has increased"
#Output
# "Damage in millions of $ mean is 105.647750 and median is 34.000000 in the new dataset"
# "Number of continental US deaths mean is 31.000000 and median is 3.500000 in the new dataset"
# "Damage in millions of $ mean changed from 3340.697154 to 105.647750 and median changed from 200.000000 to 34.000000 in the new dataset"
# "Number of continental US deaths mean changed from 44.170213 to 31.000000 and median changed from 5.000000 to 3.500000 in the new dataset"

#4 For at least 3 different/distinct values in a column please rename so that every value in that column is renamed.

#Rename values in column "States Affected", SC to SCarol, FL to Florida, if more than 2 states, to More than 2 states
new_hurricane$`States affected`[new_hurricane$`States affected`=="SC"]<-"SCarol"
new_hurricane$`States affected`[new_hurricane$`States affected`=="FL"]<-"Florida"
new_hurricane$`States affected`[nchar(new_hurricane$`States affected`)>=8]<-"More than 2 states"
View(new_hurricane)

#5Display enough rows to see examples of all of steps 1-5 above.

#show 7 rows of each dataset
print("First 7 rows of both datasets")
## [1] "First 7 rows of both datasets"
hurricane[1:7,]
##              X     Name Year LF.WindsMPH LF.PressureMB LF.times BaseDamage
## 1     Easy1950     Easy 1950         120           958        1     3.3000
## 2     King1950     King 1950         130           955        1    28.0000
## 3     Able1952     Able 1952          85           985        1     2.7500
## 4  Barbara1953  Barbara 1953          85           987        1     1.0000
## 5 Florence1953 Florence 1953          85           985        1     0.2000
## 6    Carol1954    Carol 1954         120           960        2   460.2275
## 7     Edna1954     Edna 1954         120           954        1    40.0000
##   NDAM2014 AffectedStates    firstLF deaths mf BaseDam2014
## 1     1870             FL 1950-09-04      2  f   32.419419
## 2     6030             FL 1950-10-17      4  m  275.073859
## 3      170             SC 1952-08-30      3  m   24.569434
## 4       65             NC 1953-08-13      1  f    8.867416
## 5       18             FL 1953-09-26      0  f    1.773483
## 6    21375    NC,NY,CT,RI 1954-08-30     60  f 4050.686353
## 7     3520          MA,ME 1954-09-11     20  f  352.059480
new_hurricane[1:7,]
##   Hurricane Name Year occured Property damage    States affected
## 1           Easy         1950          3.3000            Florida
## 2           King         1950         28.0000            Florida
## 3           Able         1952          2.7500             SCarol
## 4        Barbara         1953          1.0000                 NC
## 5       Florence         1953          0.2000            Florida
## 6          Carol         1954        460.2275 More than 2 states
## 7           Edna         1954         40.0000              MA,ME
##   Number of continental US deaths
## 1                               2
## 2                               4
## 3                               3
## 4                               1
## 5                               0
## 6                              60
## 7                              20