Warning: package 'dplyr' was built under R version 4.3.3
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
profit_by_category <- forbes %>%group_by(category) %>%summarise(avg_profit =mean(profits, na.rm =TRUE),count =n() ) %>%arrange(avg_profit) ##summarizing and arranging the data to find company types with highest and lowest profitsprofit_by_category %>%slice_head(n =5) ##telecommunication services is lowest profited company type
##finding countries with highest profits by summarizing and arranging the dataforbes %>%group_by(country) %>%summarise(total_profit =sum(profits, na.rm =TRUE),avg_profit =mean(profits, na.rm =TRUE)) %>%arrange(desc(total_profit)) ##united states generates highest profits
# A tibble: 61 × 3
country total_profit avg_profit
<fct> <dbl> <dbl>
1 United States 487. 0.652
2 Canada 23.3 0.416
3 United Kingdom 21.7 0.160
4 Australia 18.1 0.502
5 South Korea 15.6 0.347
6 China 15.5 0.622
7 Russia 14.9 1.24
8 Switzerland 13.8 0.404
9 Spain 11.7 0.405
10 Netherlands/ United Kingdom 10.6 5.32
# ℹ 51 more rows
##finding countries with highest sales by summarizing and arranging the dataforbes %>%group_by(country) %>%summarise(total_sales =sum(sales, na.rm =TRUE),avg_sales =mean(sales, na.rm =TRUE)) %>%arrange(desc(total_sales)) ##united states generates highest sales
# A tibble: 61 × 3
country total_sales avg_sales
<fct> <dbl> <dbl>
1 United States 7554. 10.1
2 Japan 3220. 10.2
3 United Kingdom 1431. 10.4
4 Germany 1351. 20.8
5 France 1266. 20.1
6 Netherlands 477. 17.0
7 Switzerland 424. 12.5
8 Italy 419. 10.2
9 Canada 360. 6.43
10 South Korea 359. 7.97
# ℹ 51 more rows
##using filter to find better ranked country between usa and japanforbes %>%filter(country %in%c("United States", "Japan")) %>%group_by(country) %>%summarise(best_rank =min(rank, na.rm =TRUE))
# A tibble: 2 × 2
country best_rank
<fct> <int>
1 Japan 8
2 United States 1
##usa is ranked at 1, meaning it has the higher rank when compared to japan, which is at 8library(dplyr)library(tidyr)
Warning: package 'tidyr' was built under R version 4.3.3
##using filter and count to directly compare both countries, using pivot to create a table directly comparing results from both countriesforbes %>%filter(country =="United States") %>%count(category, sort =TRUE) %>%slice_max(n, n =5) ##has banking and diversified financials as top 2 types
category n
1 Banking 83
2 Diversified financials 60
3 Utilities 54
4 Health care equipment & services 53
5 Retailing 53
forbes %>%filter(country =="Japan") %>%count(category, sort =TRUE) %>%slice_max(n, n =5) ##also has banking and diversified financials as top 2 types
category n
1 Banking 69
2 Diversified financials 24
3 Consumer durables 22
4 Transportation 20
5 Capital goods 19
##while top 2 are similar between both countries, usa is much higher at other types such as retailing and utilities, and is also pretty much higher for every other category japan has many of
profit_model <-lm(profits ~ assets + marketvalue + sales, data = forbes) ##building linear regression model using assets, market value, salessummary(profit_model) ##looking at summary of linear regression
Call:
lm(formula = profits ~ assets + marketvalue + sales, data = forbes)
Residuals:
Min 1Q Median 3Q Max
-29.2169 -0.0189 0.1160 0.2107 8.9495
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.1186259 0.0380039 -3.121 0.00183 **
assets -0.0008395 0.0003781 -2.220 0.02651 *
marketvalue 0.0363340 0.0018183 19.982 < 2e-16 ***
sales 0.0098892 0.0024331 4.064 5e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.472 on 1991 degrees of freedom
(5 observations deleted due to missingness)
Multiple R-squared: 0.3059, Adjusted R-squared: 0.3049
F-statistic: 292.5 on 3 and 1991 DF, p-value: < 2.2e-16
##all variables have a small p value, meaning it can be suggested that there is a relationship between them and profitsanova(profit_model) ##looking at anova of linear regression
##all variables have a tiny value, making them statistically significant, meaning there is a relationship between them and profits, assets and market value have equally small p-values, meaning they both seem to have an equally great effect on profitshist(residuals(profit_model), main ="Residuals", xlab ="Residuals") ##building distribution of residuals
##histogram is left skewed, suggesting there may be some bias within the linear regression model
forbes_usa <-filter(forbes, country =="United States") ##filtering data to make model of usamodel_usa <-lm(profits ~ assets + marketvalue + sales, data = forbes_usa) ##making linear regression model for usa including assets, market value, and salesanova(model_usa) ##all variables have tiny p-values, making them signficant on effecting assets
forbes_japan <-filter(forbes, country =="Japan") ##filtering data to make model of japanmodel_japan <-lm(profits ~ assets + marketvalue + sales, data = forbes_japan) ##making linear regression model for japan including same variables (assets, market value, sales)anova(model_japan) ##assets and market value have significant p-values, sales does not
## main difference between usa and japan is that assets, market value, and sales all have a significant effect on profits in the usa, while only assets and market value have an impact on profit in japan (sales does not)