期末試験

-

d <- read.csv("https://stats.dip.jp/01_ds/data/ad_sales.csv")

str(d)

## 'data.frame':    200 obs. of  4 variables:
##  $ テレビ: int  2301 445 172 1515 1808 87 575 1202 86 1998 ...
##  $ ラジオ: int  378 393 459 413 108 489 328 196 21 26 ...
##  $ 新聞  : int  692 451 693 585 584 750 235 116 10 212 ...
##  $ 売上  : int  221000 104000 93000 185000 129000 72000 118000 132000 48000 106000 ...

library(DT)
datatable(d, caption = 'ある製品の広告媒体別費用と売上（単位：万円）')

COL <- c(rgb(255,   0,   0,  105, max = 255), 
         rgb(  0,   0, 255,  105, max = 255), 
         rgb(  0, 155,   0,  105, max = 255))

boxplot(d$テレビ, d$ラジオ, d$新聞,
        col = COL,
        ylim = c(0, 3000),
        names = c('テレビ', 'ラジオ', '新聞'),
        main = '広告媒体別費用')

abline(h = seq(0, 3000, 500), lty = 2, col = gray(.5, .25))

library(psych)

pairs.panels(d)

(uv <- cor.test(d$テレビ, d$ラジオ))

## 
##  Pearson's product-moment correlation
## 
## data:  d$テレビ and d$ラジオ
## t = 0.77239, df = 198, p-value = 0.4408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08457548  0.19208899
## sample estimates:
##        cor 
## 0.05480866

ifelse(uv$p.value < 0.05, '有意', '有意でない')

## [1] "有意でない"

(uw <- cor.test(d$テレビ, d$新聞))

## 
##  Pearson's product-moment correlation
## 
## data:  d$テレビ and d$新聞
## t = 0.79839, df = 198, p-value = 0.4256
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08274345  0.19386523
## sample estimates:
##        cor 
## 0.05664787

ifelse(uw$p.value < 0.05, '有意', '有意でない')

## [1] "有意でない"

(vw <- cor.test(d$ラジオ, d$新聞))

## 
##  Pearson's product-moment correlation
## 
## data:  d$ラジオ and d$新聞
## t = 5.3279, df = 198, p-value = 2.689e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2264899 0.4697658
## sample estimates:
##       cor 
## 0.3541038

ifelse(vw$p.value < 0.05, '有意', '有意でない')

## [1] "有意"

library(Hmisc)

## 
##  次のパッケージを付け加えます: 'Hmisc'

##  以下のオブジェクトは 'package:psych' からマスクされています:
## 
##     describe

##  以下のオブジェクトは 'package:base' からマスクされています:
## 
##     format.pval, units

rcorr(as.matrix(d))

##        テレビ ラジオ 新聞 売上
## テレビ   1.00   0.05 0.06 0.78
## ラジオ   0.05   1.00 0.35 0.58
## 新聞     0.06   0.35 1.00 0.23
## 売上     0.78   0.58 0.23 1.00
## 
## n= 200 
## 
## 
## P
##        テレビ ラジオ 新聞   売上  
## テレビ        0.4408 0.4256 0.0000
## ラジオ 0.4408        0.0000 0.0000
## 新聞   0.4256 0.0000        0.0011
## 売上   0.0000 0.0000 0.0011

library(ggcorrplot)

##  要求されたパッケージ ggplot2 をロード中です

## 
##  次のパッケージを付け加えます: 'ggplot2'

##  以下のオブジェクトは 'package:psych' からマスクされています:
## 
##     %+%, alpha

library(plotly)

## 
##  次のパッケージを付け加えます: 'plotly'

##  以下のオブジェクトは 'package:ggplot2' からマスクされています:
## 
##     last_plot

##  以下のオブジェクトは 'package:Hmisc' からマスクされています:
## 
##     subplot

##  以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter

##  以下のオブジェクトは 'package:graphics' からマスクされています:
## 
##     layout

cor(d) |> ggcorrplot(lab = T, hc.order = T, outline.color = "white", p.mat = cor_pmat(d)) |> ggplotly() |>
layout(font  = list(size = 11, color = 'blue', family = 'UD Digi Kyokasho NK-R'),
       title = '広告媒体別費用の無相関検定')

library(sjPlot)
fit <- lm(売上 ~ テレビ + ラジオ + 新聞, data = d)
tab_model(fit, show.stat = T, show.aic = T)

	売上
Predictors	Estimates	CI	Statistic	p
(Intercept)	29388.89	23237.62 – 35540.16	9.42	<0.001
テレビ	45.76	43.01 – 48.52	32.81	<0.001
ラジオ	188.53	171.55 – 205.51	21.89	<0.001
新聞	-1.04	-12.62 – 10.54	-0.18	0.860
Observations	200
R² / R² adjusted	0.897 / 0.896
AIC	4466.498

fit1 <- lm(売上 ~ テレビ + ラジオ, data = d)　#新聞なしの場合
tab_model(fit1, show.stat = T, show.aic = T)

	売上
Predictors	Estimates	CI	Statistic	p
(Intercept)	29211.00	23403.43 – 35018.57	9.92	<0.001
テレビ	45.75	43.01 – 48.50	32.91	<0.001
ラジオ	187.99	172.14 – 203.85	23.38	<0.001
Observations	200
R² / R² adjusted	0.897 / 0.896
AIC	4464.530

plot_model(fit1, show.values = T, show.intercept = T, width = 0.1)

COL <- c(rgb(255,   0,   0,  105, max = 255), 
         rgb(  0,   0, 255,  105, max = 255), 
         rgb(  0, 155,   0,  105, max = 255), 
         rgb(140, 140, 140,  105, max = 255), 
         rgb(180, 180, 180,  105, max = 255))

fit2 <- lm(売上 ~ poly(テレビ, 2), data = d)

# Create a sequence of new values for predictions
tp.p <- seq(min(d$テレビ), max(d$テレビ), length.out = 100)

# Generate predictions with confidence intervals
pred <- predict(fit2, newdata = data.frame(テレビ = tp.p), interval = "predict")
conf <- predict(fit2, newdata = data.frame(テレビ = tp.p), interval = "confidence")

# Plotting
matplot(x = d$テレビ, y = d$売上,
        type = 'p', pch = 16, col = COL[1], ylim = c(min(d$売上)-5000, max(d$売上)+5000),
        main = '２次多項式回帰モデルを使った売上予測', 
        xlab = '広告費用（テレビ）', ylab = '売上(万円)')

gray.area <- function(x, lwr, upr, col) {
  polygon(c(x, rev(x)), c(lwr, rev(upr)), col = col, border = NA)
}

gray.area(tp.p, conf[, 'lwr'], conf[, 'upr'], col = COL[4])
gray.area(tp.p, pred[, 'lwr'], pred[, 'upr'], col = COL[5])

matlines(x = tp.p, y = conf[, 'fit'], col = COL[2], lwd = 3)

legend('topright',
       pch = c(16, NA, NA, NA), col = COL[-3],
       lty = c(NA, 1, NA, NA), lwd = c(NA, 3, NA, NA), 
       fill = c(NA, NA, COL[4], COL[5]), border = F,
       legend = c('テレビ', '売上', '95％信頼区間', '95％予測区間'))

期末試験

2024-07-02

-

ビジネスアナリティクス

学籍番号：23150110，氏名：大石拓真

-

期末試験

2024-07-02

-

ビジネス アナリティクス

学籍番号：23150110，氏名：大石拓真

-

ビジネスアナリティクス