date <- c(as.Date("9/24/2016","%m/%d/%Y"),
as.Date("9/25/2016","%m/%d/%Y"),
as.Date("9/26/2016","%m/%d/%Y"),
as.Date("9/27/2016","%m/%d/%Y"),
as.Date("9/28/2016","%m/%d/%Y"),
as.Date("9/29/2016","%m/%d/%Y"),
as.Date("9/30/2016","%m/%d/%Y"))
nash_temp <- c(92,94,78,79,84,67,68)
tor_temp <- c(18,16,19,21,21,19,18)
temp.diff.df <- data.frame(date,nash_temp,tor_temp)
# View(temp.diff.df)
temp.diff.df <- mutate(temp.diff.df,
tor_temp_f = tor_temp * 1.8 + 32,
temp_diff = nash_temp - tor_temp_f,
pct_temp_diff = temp_diff/tor_temp_f)
# View(temp.diff.df)
# Do not modify the following code:
knitr::kable(temp.diff.df, format = "markdown")
| date | nash_temp | tor_temp | tor_temp_f | temp_diff | pct_temp_diff |
|---|---|---|---|---|---|
| 2016-09-24 | 92 | 18 | 64.4 | 27.6 | 0.4285714 |
| 2016-09-25 | 94 | 16 | 60.8 | 33.2 | 0.5460526 |
| 2016-09-26 | 78 | 19 | 66.2 | 11.8 | 0.1782477 |
| 2016-09-27 | 79 | 21 | 69.8 | 9.2 | 0.1318052 |
| 2016-09-28 | 84 | 21 | 69.8 | 14.2 | 0.2034384 |
| 2016-09-29 | 67 | 19 | 66.2 | 0.8 | 0.0120846 |
| 2016-09-30 | 68 | 18 | 64.4 | 3.6 | 0.0559006 |
avg = round(mean(temp.diff.df$pct_temp_diff) *100,digits=4)
avg
## [1] 22.23
The mean temperature difference was 22.23%.
ggplot(temp.diff.df, aes(x=date,y=pct_temp_diff)) +
geom_bar(stat="identity",color="black", fill="gray")+
labs(x="date", y="Percent Defference",
title= "Nashville vs Toronto Temperature")
maxDiff = max(temp.diff.df$pct_temp_diff)
highestDate = subset(temp.diff.df,pct_temp_diff==maxDiff,select = date)
The percent difference was greatest on 2016-09-25.
fish <- read.csv("https://ww2.amstat.org/publications/jse/datasets/fishcatch.dat.txt",
header = FALSE,
sep = '')
colnames(fish) <- c('obs', 'species', 'weight', 'len1', 'len2', 'len3', 'height.pct', 'width.pct', 'sex')
# View(fish)
fish$sex <- factor(fish$sex, labels = c("female","male"))
fish$species <- factor(fish$species, labels = c(
'Common Bream','Whitefish','Roach','Silver Bream','Smelt','Pike','Perch')
)
# Do not modify the following code:
fish.sub <- filter(fish, sex != "NA")
knitr::kable(head(fish.sub), format = "markdown")
| obs | species | weight | len1 | len2 | len3 | height.pct | width.pct | sex |
|---|---|---|---|---|---|---|---|---|
| 14 | Common Bream | NA | 29.5 | 32 | 37.3 | 37.3 | 13.6 | male |
| 15 | Common Bream | 600 | 29.4 | 32 | 37.2 | 40.2 | 13.9 | male |
| 17 | Common Bream | 700 | 30.4 | 33 | 38.3 | 38.8 | 13.8 | male |
| 21 | Common Bream | 575 | 31.3 | 34 | 39.5 | 38.3 | 14.1 | male |
| 26 | Common Bream | 725 | 31.8 | 35 | 40.9 | 40.0 | 14.8 | male |
| 30 | Common Bream | 1000 | 33.5 | 37 | 42.6 | 44.5 | 15.5 | female |
by_species <- group_by(fish,species)
mean.wt <- summarise(by_species,
average.weight = mean(weight, na.rm = TRUE)
)
minWeight <- min(mean.wt$average.weight)
minspecies <- subset(mean.wt,average.weight == minWeight)[,1]
# Do not modify the following code:
knitr::kable(mean.wt, format = "markdown")
| species | average.weight |
|---|---|
| Common Bream | 626.00000 |
| Whitefish | 531.00000 |
| Roach | 152.05000 |
| Silver Bream | 154.81818 |
| Smelt | 11.17857 |
| Pike | 718.70588 |
| Perch | 382.23929 |
The species with the smallest mean weight is Smelt with a weight of 11.1785714g.
ggplot(mean.wt, aes(x=species,y=average.weight)) +
geom_bar(stat="identity",color="black", fill="gray")+
labs(x="Species", y="Weight",
title= "Mean Weight per Species")
maxDiff = max(temp.diff.df$pct_temp_diff)
highestDate = subset(temp.diff.df,pct_temp_diff==maxDiff,select = date)
mean.wt
## # A tibble: 7 × 2
## species average.weight
## <fctr> <dbl>
## 1 Common Bream 626.00000
## 2 Whitefish 531.00000
## 3 Roach 152.05000
## 4 Silver Bream 154.81818
## 5 Smelt 11.17857
## 6 Pike 718.70588
## 7 Perch 382.23929
library(readr)
library(dplyr)
Forbes <- read_csv("2014 Forbes Global 2000.csv")
## Parsed with column specification:
## cols(
## Rank = col_integer(),
## Company = col_character(),
## Sector = col_character(),
## Industry = col_character(),
## Continent = col_character(),
## Country = col_character(),
## Sales = col_double(),
## Profits = col_double(),
## Assets = col_double(),
## Market_Value = col_double()
## )
Forbes <-
filter(Forbes,is.na(Forbes$Sector)==FALSE,Sales!=0)
Forbes$Sector <- factor(Forbes$Sector)
Forbes$Industry <- factor(Forbes$Industry)
Forbes$Continent <- factor(Forbes$Continent)
Forbes$Country <- factor(Forbes$Country)
# ggplot(data = Forbes) +
# geom_mosaic(aes(x=Sector,y=Continent))
SectorContinent <- table(Forbes$Sector, Forbes$Continent)
mosaicplot(SectorContinent, color = c("red","green","blue","yellow","brown","purple"), main = "Sector by Continent")
Forbes.CC <- Forbes %>%
dplyr::select(Sector) %>%
group_by(Sector) %>%
summarise(CompanyCount = n())
answer1 <- subset(Forbes.CC,CompanyCount==max(Forbes.CC$CompanyCount))[,1]
Forbes.NACC <- Forbes %>%
dplyr::select(Sector,Continent) %>%
group_by(Sector) %>%
filter(Continent == "North America") %>%
summarise(CompanyCount = n())
answer2 <- subset(Forbes.NACC,CompanyCount == max(Forbes.NACC$CompanyCount))[,1]
The Financials sector contains the largest number of companies.
North America’s largest sector (in terms of the number of companies) is Financials.
Forbes <- mutate(Forbes, ProfMgn = Profits/Sales)
# Do not modify the following code:
knitr::kable(head(Forbes), format = "markdown")
| Rank | Company | Sector | Industry | Continent | Country | Sales | Profits | Assets | Market_Value | ProfMgn |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | ICBC | Financials | Major Banks | Asia | China | 148.7 | 42.7 | 3124.9 | 215.6 | 0.2871553 |
| 2 | China Construction Bank | Financials | Regional Banks | Asia | China | 121.3 | 34.2 | 2449.5 | 174.4 | 0.2819456 |
| 3 | Agricultural Bank of China | Financials | Regional Banks | Asia | China | 136.4 | 27.0 | 2405.4 | 141.1 | 0.1979472 |
| 4 | JPMorgan Chase | Financials | Major Banks | North America | United States | 105.7 | 17.3 | 2435.3 | 229.7 | 0.1636708 |
| 5 | Berkshire Hathaway | Financials | Investment Services | North America | United States | 178.8 | 19.5 | 493.4 | 309.1 | 0.1090604 |
| 6 | Exxon Mobil | Energy | Oil & Gas Operations | North America | United States | 394.0 | 32.6 | 346.8 | 422.3 | 0.0827411 |
p <- ggplot(Forbes, aes(Sector,ProfMgn))
p + coord_flip() + geom_boxplot()
The sector that appears to have the greatest standard deviation is Consumer Discretionary.
Forbes.SD <- Forbes %>%
dplyr::select(Sector,ProfMgn) %>%
group_by(Sector) %>%
# filter(Year.Built >= 2000) %>%
# mutate(totalObs = 93) %>%
summarise(StDev = sd(ProfMgn)) #%>%
# filter(count >= 10) %>%
# arrange(desc(meanPricePerSqFt))
SDanswer <- subset(Forbes.SD, StDev == max(Forbes.SD$StDev))[,1]
# Do not modify the following code:
knitr::kable(Forbes.SD, format = "markdown")
| Sector | StDev |
|---|---|
| Consumer Discretionary | 0.6289455 |
| Consumer Staples | 0.1000578 |
| Energy | 0.1058560 |
| Financials | 0.4052307 |
| Health Care | 0.1074421 |
| Industrials | 0.0993903 |
| Information Technology | 0.2345233 |
| Materials | 0.2154817 |
| Telecommunication Services | 0.1022383 |
| Utilities | 0.1623265 |
The sector that has the greatest standard deviation is Consumer Discretionary. Consumer Discretionary looks to have one strong outlier that pulls that standard deviation above the Financials sector.