library(readxl)
library(ggplot2)
library(RColorBrewer)
library(magrittr)
library(tidyverse)
dataset <- read_excel("Coins.xlsx")
df <- dataset[c(1:4)]
df %>% head( .,4)
## # A tibble: 4 x 4
## Coin Condition Mintage Value
## <chr> <chr> <dbl> <dbl>
## 1 1878, 8 feathers VF-20 749500 85
## 2 1878, 8 feathers EF-40 749500 100
## 3 1878, 8 feathers AU-50 749500 120
## 4 1878, 8 feathers MS-60 749500 200
df3 <- df %>%
mutate(log_val = log2(Value),
log_Mintage = log2(Mintage))
df3 <- df3%>% filter( Condition != "PF-63")
attach(df3)
Condition1 = as.factor(df3$Condition)
val <- levels(Condition1)
Condition2 <- relevel(Condition1, ref= "VF-20")
attach(df3)
## The following objects are masked from df3 (pos = 3):
##
## Coin, Condition, log_Mintage, log_val, Mintage, Value
model3 <- lm(log_val ~ Condition2 + log_Mintage+ Condition2:log_Mintage, data = df3)
summary(model3)
##
## Call:
## lm(formula = log_val ~ Condition2 + log_Mintage + Condition2:log_Mintage,
## data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6194 -1.0392 -0.2207 0.5179 8.9794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.27817 2.24486 9.033 < 2e-16 ***
## Condition2AU-50 3.97420 3.17471 1.252 0.211096
## Condition2EF-40 1.83729 3.17471 0.579 0.562980
## Condition2MS-60 9.21522 3.17471 2.903 0.003829 **
## Condition2MS-63 10.01337 3.17471 3.154 0.001687 **
## Condition2MS-64 10.42315 3.17471 3.283 0.001083 **
## Condition2MS-65 11.23284 3.17471 3.538 0.000432 ***
## log_Mintage -0.65650 0.10273 -6.391 3.21e-10 ***
## Condition2AU-50:log_Mintage -0.14348 0.14528 -0.988 0.323723
## Condition2EF-40:log_Mintage -0.06909 0.14528 -0.476 0.634549
## Condition2MS-60:log_Mintage -0.33433 0.14528 -2.301 0.021697 *
## Condition2MS-63:log_Mintage -0.34123 0.14528 -2.349 0.019139 *
## Condition2MS-64:log_Mintage -0.32933 0.14528 -2.267 0.023735 *
## Condition2MS-65:log_Mintage -0.29861 0.14528 -2.055 0.040244 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.903 on 630 degrees of freedom
## Multiple R-squared: 0.6041, Adjusted R-squared: 0.5959
## F-statistic: 73.95 on 13 and 630 DF, p-value: < 2.2e-16
It can be seen that p-value of the F-statistic is 5.627e-09, which is highly significant. This means that, at least, one of the predictor variables is significantly related to the outcome variable.
#Histograms
par(mfrow = c(1,2))
hist(log_val,col="yellow")
hist(log_Mintage,col="cyan")
#Scatterplot
scat <- ggplot(df3, aes(log_Mintage, log_val, colour = factor(Condition))) +
geom_point()
scat + facet_grid(. ~ Condition, scales = "free")
#boxplot
ggplot(df3, aes(x=as.factor(Condition), y=log_val)) +
geom_boxplot( fill = "slateblue " , alpha=0.2) +
xlab("Condition")