dates <- as.Date(c('2016-09-24', '2016-09-25', '2016-09-26', '2016-09-27', '2016-09-28', '2016-09-29','2016-09-30'))
nash_temp <- c(92, 94, 78, 79, 84, 67, 68)
tor_temp <- c(18, 16, 19, 21, 21, 19, 18)
temp.diff.df <- data.frame(dates, nash_temp, tor_temp)
temp.diff.df <- temp.diff.df %>%
mutate(
tor_temp_f = tor_temp * (9/5) + 32,
temp_diff = nash_temp - tor_temp_f,
pct_temp_diff = temp_diff / tor_temp_f
)
# Do not modify the following code:
knitr::kable(temp.diff.df, format = "markdown")
| dates | nash_temp | tor_temp | tor_temp_f | temp_diff | pct_temp_diff |
|---|---|---|---|---|---|
| 2016-09-24 | 92 | 18 | 64.4 | 27.6 | 0.4285714 |
| 2016-09-25 | 94 | 16 | 60.8 | 33.2 | 0.5460526 |
| 2016-09-26 | 78 | 19 | 66.2 | 11.8 | 0.1782477 |
| 2016-09-27 | 79 | 21 | 69.8 | 9.2 | 0.1318052 |
| 2016-09-28 | 84 | 21 | 69.8 | 14.2 | 0.2034384 |
| 2016-09-29 | 67 | 19 | 66.2 | 0.8 | 0.0120846 |
| 2016-09-30 | 68 | 18 | 64.4 | 3.6 | 0.0559006 |
mean(temp.diff.df$pct_temp_diff)
## [1] 0.2223001
The mean temperature difference was 22.230008%.
ggplot(temp.diff.df, aes(y = pct_temp_diff, x = dates)) + geom_bar(stat="identity") + ylab("Percent Difference in Temperature") + xlab("Date") + ggtitle("Percent Difference in Temperature by Day")
The percent difference was greatest on September 25th.
fish <- read.table(file="https://ww2.amstat.org/publications/jse/datasets/fishcatch.dat.txt", header=F, sep="", col.names = c("obs", "species", "weight","len1", "len2", "len3", "height.pct", "width.pct", "sex"), na.strings = "NA")
fish$sex <- factor(fish$sex, levels = 0:1, labels = c("female", "male"))
fish$species <- factor(fish$species, levels = 1:7, labels = c("Common Bream", "Whitefish", "Roach", "Silver Bream", "Smelt", "Pike", "Perch"))
# Do not modify the following code:
fish.sub <- filter(fish, sex != "NA")
knitr::kable(head(fish.sub), format = "markdown")
| obs | species | weight | len1 | len2 | len3 | height.pct | width.pct | sex |
|---|---|---|---|---|---|---|---|---|
| 14 | Common Bream | NA | 29.5 | 32 | 37.3 | 37.3 | 13.6 | male |
| 15 | Common Bream | 600 | 29.4 | 32 | 37.2 | 40.2 | 13.9 | male |
| 17 | Common Bream | 700 | 30.4 | 33 | 38.3 | 38.8 | 13.8 | male |
| 21 | Common Bream | 575 | 31.3 | 34 | 39.5 | 38.3 | 14.1 | male |
| 26 | Common Bream | 725 | 31.8 | 35 | 40.9 | 40.0 | 14.8 | male |
| 30 | Common Bream | 1000 | 33.5 | 37 | 42.6 | 44.5 | 15.5 | female |
mean.wt <- fish %>%
group_by(species) %>%
summarize(
mean_weight = mean(weight, na.rm=TRUE)
)
# Do not modify the following code:
knitr::kable(mean.wt, format = "markdown")
| species | mean_weight |
|---|---|
| Common Bream | 626.00000 |
| Whitefish | 531.00000 |
| Roach | 152.05000 |
| Silver Bream | 154.81818 |
| Smelt | 11.17857 |
| Pike | 718.70588 |
| Perch | 382.23929 |
The species with the smallest mean weight is Smelt with a weight of 11.17857g.
ggplot(mean.wt, aes(y = mean_weight, x = species)) + geom_bar(stat="identity") + ylab("Weight") + xlab("Species") + ggtitle("Mean Weight per Species")
Forbes <- read.csv("~/MSDS_5043/2014 Forbes Global 2000.csv", stringsAsFactors=FALSE)
Forbes <- Forbes %>%
filter(Sector != "" & Sales > 0)
Forbes$Sector <- factor(Forbes$Sector)
Forbes$Industry <- factor(Forbes$Industry)
Forbes$Continent <- factor(Forbes$Continent)
Forbes$Country <- factor(Forbes$Country)
mosaicplot(table(Forbes$Sector, Forbes$Continent), color=c("red","green","blue","yellow","brown","purple"), main= "Sector by Continent", xlab = "Sector of Industry", ylab = "Continent")
The Financials sector contains the largest number of companies.
North America’s largest sector (in terms of the number of companies) is Financials.
Forbes <- Forbes %>%
mutate(ProfMgn = Profits/Sales)
# Do not modify the following code:
knitr::kable(head(Forbes), format = "markdown")
| Rank | Company | Sector | Industry | Continent | Country | Sales | Profits | Assets | Market_Value | ProfMgn |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | ICBC | Financials | Major Banks | Asia | China | 148.7 | 42.7 | 3124.9 | 215.6 | 0.2871553 |
| 2 | China Construction Bank | Financials | Regional Banks | Asia | China | 121.3 | 34.2 | 2449.5 | 174.4 | 0.2819456 |
| 3 | Agricultural Bank of China | Financials | Regional Banks | Asia | China | 136.4 | 27.0 | 2405.4 | 141.1 | 0.1979472 |
| 4 | JPMorgan Chase | Financials | Major Banks | North America | United States | 105.7 | 17.3 | 2435.3 | 229.7 | 0.1636708 |
| 5 | Berkshire Hathaway | Financials | Investment Services | North America | United States | 178.8 | 19.5 | 493.4 | 309.1 | 0.1090604 |
| 6 | Exxon Mobil | Energy | Oil & Gas Operations | North America | United States | 394.0 | 32.6 | 346.8 | 422.3 | 0.0827411 |
ggplot(Forbes, aes(y = ProfMgn, x = Sector)) + stat_boxplot(geom = "errorbar", width=0.5) + geom_boxplot() + coord_flip()
The sector that appears to have the greatest standard deviation is Financials.
Forbes.SD <- Forbes %>%
group_by(Sector) %>%
summarise(sd(ProfMgn))
# Do not modify the following code:
knitr::kable(Forbes.SD, format = "markdown")
| Sector | sd(ProfMgn) |
|---|---|
| Consumer Discretionary | 0.6289455 |
| Consumer Staples | 0.1000578 |
| Energy | 0.1058560 |
| Financials | 0.4052307 |
| Health Care | 0.1074421 |
| Industrials | 0.0993903 |
| Information Technology | 0.2345233 |
| Materials | 0.2154817 |
| Telecommunication Services | 0.1022383 |
| Utilities | 0.1623265 |
The sector that has the greatest standard deviation is Consumer Discretionary.This calculation does not match my initial guess that Financials had the greatest standard deviation. Orginially, I thought that the wider spread of the box and the number of outliers on both ends would result in the largest standard deviation. While Financials did have the second largest standard deviation, Consumer Discretionary’s outlier at 10 made a more significant impact than I expected and gave it the highest standard deviation.