#Named US Atlantic Hurricanes
#Read from my Github
hurricane <- read.csv("https://raw.githubusercontent.com/ex-pr/Dataset/main/hurricNamed.csv", header=TRUE, sep=",")
#summary of the dateset and view dataset
summary(hurricane)
## X Name Year LF.WindsMPH
## Length:94 Length:94 Min. :1950 Min. : 75.0
## Class :character Class :character 1st Qu.:1964 1st Qu.: 85.0
## Mode :character Mode :character Median :1985 Median :105.0
## Mean :1982 Mean :104.7
## 3rd Qu.:1999 3rd Qu.:120.0
## Max. :2012 Max. :190.0
## LF.PressureMB LF.times BaseDamage NDAM2014
## Min. : 909.0 Min. :1.000 Min. : 0.20 Min. : 1
## 1st Qu.: 950.0 1st Qu.:1.000 1st Qu.: 25.75 1st Qu.: 290
## Median : 963.5 Median :1.000 Median : 200.00 Median : 2090
## Mean : 964.4 Mean :1.117 Mean : 3340.70 Mean : 8433
## 3rd Qu.: 982.8 3rd Qu.:1.000 3rd Qu.: 1500.00 3rd Qu.: 9050
## Max. :1003.0 Max. :3.000 Max. :81000.00 Max. :88420
## AffectedStates firstLF deaths mf
## Length:94 Length:94 Min. : 0.00 Length:94
## Class :character Class :character 1st Qu.: 2.00 Class :character
## Mode :character Mode :character Median : 5.00 Mode :character
## Mean : 44.17
## 3rd Qu.: 21.00
## Max. :1836.00
## BaseDam2014
## Min. : 1.04
## 1st Qu.: 93.11
## Median : 908.33
## Mean : 4830.19
## 3rd Qu.: 3341.62
## Max. :98195.39
View(hurricane)
#Find mean and median for damage $ and deaths
basedamage_mean <-mean(hurricane$BaseDamage)
basedamage_median <-median(hurricane$BaseDamage)
deaths_mean <-mean(hurricane$deaths)
deaths_median <-median(hurricane$deaths)
#Print result
print(sprintf("Damage in millions of $ mean is %f and median is %f", basedamage_mean, basedamage_median))
## [1] "Damage in millions of $ mean is 3340.697154 and median is 200.000000"
print(sprintf("Number of continental US deaths mean is %f and median is %f", deaths_mean, deaths_median))
## [1] "Number of continental US deaths mean is 44.170213 and median is 5.000000"
# Output
#[1] "Damage in millions of $ mean is 3340.697154 and median is 200.000000"
#[1] "Number of continental US deaths mean is 44.170213 and median is 5.000000"
#2,3. Create a new data frame with a subset of the columns AND rows.Create new column names for each column in the new data frame
#Create new dataset with 10 rows and 5 columns
new_hurricane <- hurricane[1:10, c(2, 3, 7, 9, 11)]
#Rename columns
colnames(new_hurricane) <- c("Hurricane Name", "Year occured", "Property damage", "States affected", "Number of continental US deaths")
summary(new_hurricane)
## Hurricane Name Year occured Property damage States affected
## Length:10 Min. :1950 Min. : 0.200 Length:10
## Class :character 1st Qu.:1952 1st Qu.: 2.888 Class :character
## Mode :character Median :1954 Median : 34.000 Mode :character
## Mean :1953 Mean :105.648
## 3rd Qu.:1954 3rd Qu.:160.000
## Max. :1955 Max. :460.228
## Number of continental US deaths
## Min. : 0.00
## 1st Qu.: 1.25
## Median : 3.50
## Mean : 31.00
## 3rd Qu.: 20.00
## Max. :200.00
#Find new values for meand/meadian damage and deaths
newdamage_mean <-mean(new_hurricane[,3])
newdamage_median <-median(new_hurricane[,3])
newdeaths_mean <-mean(new_hurricane[,5])
newdeaths_median <-median(new_hurricane[,5])
#Print new results and compare to previous values
print(sprintf("Damage in millions of $ mean is %f and median is %f in the new dataset", newdamage_mean, newdamage_median))
## [1] "Damage in millions of $ mean is 105.647750 and median is 34.000000 in the new dataset"
print(sprintf("Number of continental US deaths mean is %f and median is %f in the new dataset", newdeaths_mean, newdeaths_median))
## [1] "Number of continental US deaths mean is 31.000000 and median is 3.500000 in the new dataset"
print(sprintf("Damage in millions of $ mean changed from %f to %f and median changed from %f to %f in the new dataset", basedamage_mean, newdamage_mean, basedamage_median, newdamage_median))
## [1] "Damage in millions of $ mean changed from 3340.697154 to 105.647750 and median changed from 200.000000 to 34.000000 in the new dataset"
print(sprintf("Number of continental US deaths mean changed from %f to %f and median changed from %f to %f in the new dataset", deaths_mean, newdeaths_mean, deaths_median, newdeaths_median))
## [1] "Number of continental US deaths mean changed from 44.170213 to 31.000000 and median changed from 5.000000 to 3.500000 in the new dataset"
print("Damage in millions of $: mean and median became smaller in the new dataset as we are using only first 10 rows from the original dataset that covers years 1950-1955, the original data set covers years 1950-2012, more buildings, roads, etc were built since 1955 that can be destroyed by a hurricane. As time goes on, there is more damage from hurricanes as there is more things to destroy. Also, since max and min values define mean and median too, max damage from hurricane in the original data set was 81000 milllions of$, in the new dataset that covers only 1950-1955 it is 460 millions of $, so median moved closer to the left, became smaller.")
## [1] "Damage in millions of $: mean and median became smaller in the new dataset as we are using only first 10 rows from the original dataset that covers years 1950-1955, the original data set covers years 1950-2012, more buildings, roads, etc were built since 1955 that can be destroyed by a hurricane. As time goes on, there is more damage from hurricanes as there is more things to destroy. Also, since max and min values define mean and median too, max damage from hurricane in the original data set was 81000 milllions of$, in the new dataset that covers only 1950-1955 it is 460 millions of $, so median moved closer to the left, became smaller."
print("Number of continental US deaths: mean and median became smaller in the new dataset. Maximun deaths changed from 1836 to 200, as a result, median moved to the left, became smaller as well as mean. Less people lived at 1950-1955 than at the recent times. Population becomes larger, number of lives that can be affected by a hurricane has increased")
## [1] "Number of continental US deaths: mean and median became smaller in the new dataset. Maximun deaths changed from 1836 to 200, as a result, median moved to the left, became smaller as well as mean. Less people lived at 1950-1955 than at the recent times. Population becomes larger, number of lives that can be affected by a hurricane has increased"
#Output
# "Damage in millions of $ mean is 105.647750 and median is 34.000000 in the new dataset"
# "Number of continental US deaths mean is 31.000000 and median is 3.500000 in the new dataset"
# "Damage in millions of $ mean changed from 3340.697154 to 105.647750 and median changed from 200.000000 to 34.000000 in the new dataset"
# "Number of continental US deaths mean changed from 44.170213 to 31.000000 and median changed from 5.000000 to 3.500000 in the new dataset"
#4 For at least 3 different/distinct values in a column please rename so that every value in that column is renamed.
#Rename values in column "States Affected", SC to SCarol, FL to Florida, if more than 2 states, to More than 2 states
new_hurricane$`States affected`[new_hurricane$`States affected`=="SC"]<-"SCarol"
new_hurricane$`States affected`[new_hurricane$`States affected`=="FL"]<-"Florida"
new_hurricane$`States affected`[nchar(new_hurricane$`States affected`)>=8]<-"More than 2 states"
View(new_hurricane)
#5Display enough rows to see examples of all of steps 1-5 above.
#show 7 rows of each dataset
print("First 7 rows of both datasets")
## [1] "First 7 rows of both datasets"
hurricane[1:7,]
## X Name Year LF.WindsMPH LF.PressureMB LF.times BaseDamage
## 1 Easy1950 Easy 1950 120 958 1 3.3000
## 2 King1950 King 1950 130 955 1 28.0000
## 3 Able1952 Able 1952 85 985 1 2.7500
## 4 Barbara1953 Barbara 1953 85 987 1 1.0000
## 5 Florence1953 Florence 1953 85 985 1 0.2000
## 6 Carol1954 Carol 1954 120 960 2 460.2275
## 7 Edna1954 Edna 1954 120 954 1 40.0000
## NDAM2014 AffectedStates firstLF deaths mf BaseDam2014
## 1 1870 FL 1950-09-04 2 f 32.419419
## 2 6030 FL 1950-10-17 4 m 275.073859
## 3 170 SC 1952-08-30 3 m 24.569434
## 4 65 NC 1953-08-13 1 f 8.867416
## 5 18 FL 1953-09-26 0 f 1.773483
## 6 21375 NC,NY,CT,RI 1954-08-30 60 f 4050.686353
## 7 3520 MA,ME 1954-09-11 20 f 352.059480
new_hurricane[1:7,]
## Hurricane Name Year occured Property damage States affected
## 1 Easy 1950 3.3000 Florida
## 2 King 1950 28.0000 Florida
## 3 Able 1952 2.7500 SCarol
## 4 Barbara 1953 1.0000 NC
## 5 Florence 1953 0.2000 Florida
## 6 Carol 1954 460.2275 More than 2 states
## 7 Edna 1954 40.0000 MA,ME
## Number of continental US deaths
## 1 2
## 2 4
## 3 3
## 4 1
## 5 0
## 6 60
## 7 20