専門演習課題－テストー２.knit

# ライブラリ読み込み
library(readxl)
library(dplyr)

## 
##  次のパッケージを付け加えます: 'dplyr'

##  以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter, lag

##  以下のオブジェクトは 'package:base' からマスクされています:
## 
##     intersect, setdiff, setequal, union

library(DT)

# Excelファイルの読み込み
file_path <- "バイク年式;燃費 .xlsx"
sheet_names <- excel_sheets(file_path)

# 各シートを読み込み、メーカー列を追加
all_sheets <- lapply(sheet_names, function(sheet) {
  df <- read_excel(file_path, sheet = sheet)
  df$メーカー <- sheet
  df
})
all_bikes <- bind_rows(all_sheets)

# 列名を標準化（列の順番に応じて調整が必要な場合あり）
all_bikes <- all_bikes %>%
  rename(
    車種 = 1,
    年式 = 2,
    燃費 = 3,
    タンク容量 = 4,
    航続距離 = 5,
    排気量 = 6
  ) %>%
  mutate(
    年式 = as.numeric(年式),
    燃費 = as.numeric(燃費),
    タンク容量 = as.numeric(タンク容量),
    航続距離 = as.numeric(航続距離),
    排気量 = as.numeric(排気量)
  )

# 各項目をスコア化（年式、燃費、タンク容量、航続距離、排気量＝7段階）
all_bikes <- all_bikes %>%
  mutate(
    年式スコア = case_when(
      年式 >= 2020 ~ 5,
      年式 >= 2015 ~ 4,
      年式 >= 2010 ~ 3,
      年式 >= 2000 ~ 2,
      年式 >= 1990 ~ 1,
      TRUE ~ 0
    ),
    燃費スコア = case_when(
      燃費 >= 40 ~ 5,
      燃費 >= 35 ~ 4,
      燃費 >= 30 ~ 3,
      燃費 >= 25 ~ 2,
      燃費 >= 20 ~ 1,
      TRUE ~ 0
    ),
    タンクスコア = case_when(
      タンク容量 >= 20 ~ 5,
      タンク容量 >= 17 ~ 4,
      タンク容量 >= 14 ~ 3,
      タンク容量 >= 10 ~ 2,
      タンク容量 >= 5 ~ 1,
      TRUE ~ 0
    ),
    航続スコア = case_when(
      航続距離 >= 500 ~ 5,
      航続距離 >= 400 ~ 4,
      航続距離 >= 300 ~ 3,
      航続距離 >= 200 ~ 2,
      航続距離 >= 100 ~ 1,
      TRUE ~ 0
    ),
    排気量スコア = case_when(
      排気量 <= 50 ~ 1,
      排気量 <= 125 ~ 2,
      排気量 <= 250 ~ 3,
      排気量 <= 400 ~ 4,
      排気量 <= 750 ~ 5,
      排気量 <= 1000 ~ 6,
      排気量 > 1000 ~ 7,
      TRUE ~ NA_real_
    ),
    合計スコア = 年式スコア + 燃費スコア + タンクスコア + 航続スコア + 排気量スコア
  )

# 表形式で表示（インタラクティブ）
datatable(
  all_bikes %>%
    select(メーカー, 車種, 年式, 燃費, タンク容量, 航続距離, 排気量,
           年式スコア, 燃費スコア, タンクスコア, 航続スコア, 排気量スコア, 合計スコア),
  options = list(pageLength = 15),
  caption = "バイクのスペック評価（スコア一覧）"
)

# 必要なパッケージ
library(readxl)
library(dplyr)

# ファイルパス
file_path <- "バイク年式;燃費 .xlsx"

# シート名を取得
sheet_names <- excel_sheets(file_path)




# シートごとのデータを読み込んで1つにまとめる
all_sheets <- lapply(sheet_names, function(sheet) {
  df <- read_excel(file_path, sheet = sheet)
  df$メーカー <- sheet  # メーカー列を追加
  df
})

# すべてのバイクデータを結合
all_bikes <- bind_rows(all_sheets)


# 例：列名の統一や文字列→数値の変換（必要に応じて調整）
all_bikes <- all_bikes %>%
  rename(
    車種 = 1,
    年式 = 2,
    燃費 = 3,
    タンク容量 = 4,
    航続距離 = 5,
    排気量 = 6
  ) %>%
  mutate(
    年式 = as.numeric(年式),
    燃費 = as.numeric(燃費),
    タンク容量 = as.numeric(タンク容量),
    航続距離 = as.numeric(航続距離),
    排気量 = as.numeric(排気量)
  )

library(ggplot2)

all_bikes %>%
  group_by(メーカー) %>%
  summarise(平均燃費 = mean(燃費, na.rm = TRUE)) %>%
  ggplot(aes(x = reorder(メーカー, 平均燃費), y = 平均燃費, fill = メーカー)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "メーカー別 平均燃費", x = "メーカー", y = "平均燃費（km/L）") +
  theme_minimal()

ggplot(all_bikes, aes(x = 排気量, y = 航続距離, color = メーカー)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "排気量と航続距離の関係", x = "排気量 (cc)", y = "航続距離 (km)") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

library(readxl)
library(dplyr)

# ファイルパス
file_path <- "バイク年式;燃費 .xlsx"
sheet_names <- excel_sheets(file_path)

# データ読み込みとメーカー列追加
all_sheets <- lapply(sheet_names, function(sheet) {
  df <- read_excel(file_path, sheet = sheet)
  df$メーカー <- sheet
  df
})
all_bikes <- bind_rows(all_sheets)

# 必要な列名の標準化（調整が必要な場合あり）
all_bikes <- all_bikes %>%
  rename(
    車種 = 1,
    年式 = 2,
    燃費 = 3,
    タンク容量 = 4,
    航続距離 = 5,
    排気量 = 6
  ) %>%
  mutate(
    年式 = as.numeric(年式),
    燃費 = as.numeric(燃費),
    排気量 = as.numeric(排気量)
  )

# 排気量帯を分類
all_bikes <- all_bikes %>%
  mutate(排気量帯 = case_when(
    排気量 <= 125 ~ "原付・小型",
    排気量 <= 250 ~ "中型",
    排気量 <= 750 ~ "大型",
    TRUE ~ "特大型"
  ))

# 排気量帯別平均燃費を可視化
library(ggplot2)

ggplot(all_bikes, aes(x = 排気量帯, y = 燃費)) +
  geom_boxplot(fill = "lightblue") +
  labs(title = "排気量帯別 燃費比較", x = "排気量帯", y = "燃費（km/L）") +
  theme_minimal()

# 仮に2スト・4スト情報を追加（例：条件や外部データで）
all_bikes <- all_bikes %>%
  mutate(ストローク = case_when(
    grepl("NSR|TZR|RZ", 車種, ignore.case = TRUE) ~ "2スト",
    TRUE ~ "4スト"
  ))

# ストローク別平均燃費比較
ggplot(all_bikes, aes(x = ストローク, y = 燃費, fill = ストローク)) +
  geom_boxplot() +
  labs(title = "2スト / 4スト 燃費比較", x = "エンジンタイプ", y = "燃費（km/L）") +
  theme_minimal()

# 年式ごとの平均燃費を折れ線で表示
all_bikes %>%
  group_by(年式) %>%
  summarise(平均燃費 = mean(燃費, na.rm = TRUE)) %>%
  ggplot(aes(x = 年式, y = 平均燃費)) +
  geom_line(color = "darkgreen") +
  geom_point() +
  labs(title = "年式別 平均燃費の推移", x = "年式", y = "平均燃費（km/L）") +
  theme_minimal()

all_bikes %>%
  group_by(メーカー) %>%
  summarise(平均燃費 = mean(燃費, na.rm = TRUE)) %>%
  arrange(desc(平均燃費))

## # A tibble: 8 × 2
##   メーカー     平均燃費
##   <chr>           <dbl>
## 1 ホンダ           39.0
## 2 ヤマハ           39.0
## 3 スズキ           38.0
## 4 アプリリア       30.3
## 5 カワサキ         26.6
## 6 KTM              23.1
## 7 BMW              22.5
## 8 トライアンフ     21.3

all_bikes <- all_bikes %>%
  mutate(
    排気量帯 = case_when(
      排気量 <= 50 ~ "〜50cc",
      排気量 <= 125 ~ "51〜125cc",
      排気量 <= 250 ~ "126〜250cc",
      排気量 <= 400 ~ "251〜400cc",
      排気量 <= 750 ~ "401〜750cc",
      排気量 <= 1000 ~ "751〜1000cc",
      排気量 > 1000 ~ "1001cc〜",
      TRUE ~ "不明"
    )
  )

all_bikes %>%
  group_by(排気量帯) %>%
  summarise(平均燃費 = mean(燃費, na.rm = TRUE)) %>%
  arrange(排気量帯)

## # A tibble: 8 × 2
##   排気量帯    平均燃費
##   <chr>          <dbl>
## 1 1001cc〜        19.9
## 2 126〜250cc      32.4
## 3 251〜400cc      27.4
## 4 401〜750cc      23.3
## 5 51〜125cc       51.3
## 6 751〜1000cc     20.3
## 7 〜50cc          66.5
## 8 不明            19.6

all_bikes %>%
  group_by(年式) %>%
  summarise(平均燃費 = mean(燃費, na.rm = TRUE)) %>%
  arrange(desc(年式))

## # A tibble: 44 × 2
##     年式 平均燃費
##    <dbl>    <dbl>
##  1  2025     26.5
##  2  2024     35.3
##  3  2023     39  
##  4  2022     23.8
##  5  2021     29.0
##  6  2020     32.2
##  7  2019     24.3
##  8  2018     43.7
##  9  2017     39  
## 10  2016     32.3
## # ℹ 34 more rows

library(DT)

datatable(
  all_bikes %>%
    group_by(メーカー) %>%
    summarise(平均燃費 = round(mean(燃費, na.rm = TRUE), 1)) %>%
    arrange(desc(平均燃費)),
  caption = "メーカー別 平均燃費"
)

library(dplyr)
library(tidyr)
library(DT)

all_bikes <- all_bikes %>%
  mutate(
    年式帯 = case_when(
      年式 >= 2020 ~ "2020年代",
      年式 >= 2010 ~ "2010年代",
      年式 >= 2000 ~ "2000年代",
      年式 >= 1990 ~ "1990年代",
      年式 >= 1980 ~ "1980年代",
      TRUE ~ "その他"
    )
  )

table1 <- all_bikes %>%
  group_by(メーカー, 排気量帯) %>%
  summarise(平均燃費 = round(mean(燃費, na.rm = TRUE), 1), .groups = "drop") %>%
  pivot_wider(names_from = 排気量帯, values_from = 平均燃費)

datatable(table1, caption = "📊 メーカー × 排気量帯ごとの平均燃費", options = list(pageLength = 20))

table2 <- all_bikes %>%
  group_by(年式帯, 排気量帯) %>%
  summarise(平均燃費 = round(mean(燃費, na.rm = TRUE), 1), .groups = "drop") %>%
  pivot_wider(names_from = 排気量帯, values_from = 平均燃費)

datatable(table2, caption = "📊 年式帯 × 排気量帯ごとの平均燃費", options = list(pageLength = 10))

bar_data <- all_bikes %>%
  group_by(メーカー) %>%
  summarise(平均燃費 = round(mean(燃費, na.rm = TRUE), 1)) %>%
  arrange(desc(平均燃費))

datatable(bar_data, caption = "📊 メーカー別 平均燃費（棒グラフ用）")

line_data <- all_bikes %>%
  group_by(年式帯) %>%
  summarise(平均燃費 = round(mean(燃費, na.rm = TRUE), 1)) %>%
  arrange(年式帯)

datatable(line_data, caption = "📈 年式帯別 平均燃費（折れ線グラフ用）")