fish <- read_table("http://www.amstat.org/publications/jse/datasets/fishcatch.dat.txt", col_names=c("obs","species","weight", "length1", "length2", "length3", "height.pct", "width.pct", "sex"))
## Parsed with column specification:
## cols(
## obs = col_double(),
## species = col_double(),
## weight = col_double(),
## length1 = col_double(),
## length2 = col_double(),
## length3 = col_double(),
## height.pct = col_double(),
## width.pct = col_double(),
## sex = col_double()
## )
fish$sex<- factor(fish$sex, levels=c(0,1),labels=c("Female","Male"))
fish$species<- factor(fish$species, levels = c(1:7), labels=c("Common Bream", "Whitefish","Roach","Silver Bream","Smelt","Pike","Perch"))
# Do not modify the following code:
fish.sub <- filter(fish, sex != "NA")
knitr::kable(head(fish.sub), format = "markdown")
| obs | species | weight | length1 | length2 | length3 | height.pct | width.pct | sex |
|---|---|---|---|---|---|---|---|---|
| 14 | Common Bream | NA | 29.5 | 32 | 37.3 | 37.3 | 13.6 | Male |
| 15 | Common Bream | 600 | 29.4 | 32 | 37.2 | 40.2 | 13.9 | Male |
| 17 | Common Bream | 700 | 30.4 | 33 | 38.3 | 38.8 | 13.8 | Male |
| 21 | Common Bream | 575 | 31.3 | 34 | 39.5 | 38.3 | 14.1 | Male |
| 26 | Common Bream | 725 | 31.8 | 35 | 40.9 | 40.0 | 14.8 | Male |
| 30 | Common Bream | 1000 | 33.5 | 37 | 42.6 | 44.5 | 15.5 | Female |
mean.wt <- fish %>%
group_by(species) %>%
summarize(mean_weight=mean(weight, na.rm=TRUE))
# Do not modify the following code:
knitr::kable(mean.wt, format = "markdown")
| species | mean_weight |
|---|---|
| Common Bream | 626.00000 |
| Whitefish | 531.00000 |
| Roach | 152.05000 |
| Silver Bream | 154.81818 |
| Smelt | 11.17857 |
| Pike | 718.70588 |
| Perch | 382.23929 |
The species with the smallest mean weight is Smelt with a weight of 11.2g.
ggplot(data=mean.wt, aes(x=species, y=mean_weight)) +
geom_col(color="black", fill="orange") +
labs(title= "Mean Weight per Species", x="Species", y="Weight")
Forbes <- read_csv("2014 Forbes Global 2000.csv")
## Parsed with column specification:
## cols(
## Rank = col_double(),
## Company = col_character(),
## Sector = col_character(),
## Industry = col_character(),
## Continent = col_character(),
## Country = col_character(),
## Sales = col_double(),
## Profits = col_double(),
## Assets = col_double(),
## Market_Value = col_double()
## )
Forbes <- filter(Forbes, !is.na(Sector), Sales != 0)
Forbes$Sector <- factor(Forbes$Sector)
Forbes$Industry <- factor(Forbes$Industry)
Forbes$Continent <- factor(Forbes$Continent)
Forbes$Country <- factor(Forbes$Country)
sales<-ggplot(subset(Forbes, Continent %in% c("Asia","Europe","North America")) , aes(x=Sales, y=Market_Value)) +
geom_point() + geom_smooth(method = "lm")
labs(title= "Sales vs. Market Value", x="Sales", y="Market Value")
## $x
## [1] "Sales"
##
## $y
## [1] "Market Value"
##
## $title
## [1] "Sales vs. Market Value"
##
## attr(,"class")
## [1] "labels"
sales + facet_grid(. ~ Continent)
Of the three continents here, North America appears to sell products at higher market values, relatively speaking.
Forbes <- mutate(Forbes, ProfMgn = Profits/Sales)
# Do not modify the following code:
knitr::kable(head(Forbes), format = "markdown")
| Rank | Company | Sector | Industry | Continent | Country | Sales | Profits | Assets | Market_Value | ProfMgn |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | ICBC | Financials | Major Banks | Asia | China | 148.7 | 42.7 | 3124.9 | 215.6 | 0.2871553 |
| 2 | China Construction Bank | Financials | Regional Banks | Asia | China | 121.3 | 34.2 | 2449.5 | 174.4 | 0.2819456 |
| 3 | Agricultural Bank of China | Financials | Regional Banks | Asia | China | 136.4 | 27.0 | 2405.4 | 141.1 | 0.1979472 |
| 4 | JPMorgan Chase | Financials | Major Banks | North America | United States | 105.7 | 17.3 | 2435.3 | 229.7 | 0.1636708 |
| 5 | Berkshire Hathaway | Financials | Investment Services | North America | United States | 178.8 | 19.5 | 493.4 | 309.1 | 0.1090604 |
| 6 | Exxon Mobil | Energy | Oil & Gas Operations | North America | United States | 394.0 | 32.6 | 346.8 | 422.3 | 0.0827411 |
ggplot(Forbes, aes(x=Sector, y=ProfMgn)) +
geom_boxplot() +
coord_flip()
The sector that appears to have the greatest standard deviation is Consumer Discretionary.
Forbes.SD <- Forbes %>%
group_by(Sector) %>%
summarize(std_profit=sd(ProfMgn, na.rm=TRUE))
# Do not modify the following code:
knitr::kable(Forbes.SD, format = "markdown")
| Sector | std_profit |
|---|---|
| Consumer Discretionary | 0.6289455 |
| Consumer Staples | 0.1000578 |
| Energy | 0.1058560 |
| Financials | 0.4052307 |
| Health Care | 0.1074421 |
| Industrials | 0.0993903 |
| Information Technology | 0.2345233 |
| Materials | 0.2154817 |
| Telecommunication Services | 0.1022383 |
| Utilities | 0.1623265 |
The sector that has the greatest standard deviation is Consumer Discretionary. Visually, the distance between its minimum and maximum point is higher than any other sector, indicating a higher level of variance overall.