## Warning: package 'kableExtra' was built under R version 4.0.2
room <- read.csv("room.csv", fileEncoding = "cp932")
population_h26 <- read.csv("population_h26.csv", fileEncoding = "cp932")
death <- read.csv("death.csv", fileEncoding = "cp932")
Salary <- read.csv("Salary.csv", fileEncoding = "cp932")
room %>% DT::datatable()
population_h26 %>% DT::datatable()
death %>% DT::datatable()
Salary %>% DT::datatable()
###
par(family= "HiraKakuProN-W3")
a<-hist(room$家賃, breaks=seq(60000,160000,10000), xlab="家賃", ylab="度数",
main="", col="gray")
bind_cols(階級=a$mids, 頻度=a$counts) %>% as.data.frame()
## 階級 頻度
## 1 65000 4
## 2 75000 10
## 3 85000 35
## 4 95000 35
## 5 105000 16
## 6 115000 14
## 7 125000 11
## 8 135000 8
## 9 145000 7
## 10 155000 0
par(family= "HiraKakuProN-W3")
a<-hist(room$家賃, breaks=seq(60000,160000,5000), xlab="家賃", ylab="度数",
main="", col="gray")
bind_cols(階級=a$mids, 頻度=a$counts) %>% as.data.frame()
## 階級 頻度
## 1 62500 0
## 2 67500 4
## 3 72500 1
## 4 77500 9
## 5 82500 15
## 6 87500 20
## 7 92500 10
## 8 97500 25
## 9 102500 13
## 10 107500 3
## 11 112500 10
## 12 117500 4
## 13 122500 9
## 14 127500 2
## 15 132500 6
## 16 137500 2
## 17 142500 4
## 18 147500 3
## 19 152500 0
## 20 157500 0
par(family= "HiraKakuProN-W3")
a<-hist(room$家賃, breaks=seq(60000,160000,20000), xlab="家賃", ylab="度数",
main="",col="gray")
bind_cols(階級=a$mids, 頻度=a$counts) %>% as.data.frame()
## 階級 頻度
## 1 70000 14
## 2 90000 70
## 3 110000 30
## 4 130000 19
## 5 150000 7
par(family= "HiraKakuProN-W3")
curve(dnorm(x),-4,4, xlab="", ylab="", yaxs="i", ylim=c(0,0.45))
par(family= "HiraKakuProN-W3")
curve(dbeta(x,1.8,5), 0, 1, xlab="", ylab="", yaxs="i", ylim=c(0,3))
par(family= "HiraKakuProN-W3")
curve(dbeta(x,5,1.8), 0, 1, xlab="", ylab="", yaxs="i", ylim=c(0,3))
curve(dunif(x), -0.5, 1.5, xlab="", ylab="", yaxs="i", ylim=c(0,1.05), n=40000)
stem(room$家賃, scale=0.5)
##
## The decimal point is 4 digit(s) to the right of the |
##
## 6 | 889
## 7 | 02778999
## 8 | 000122233344445555666666778889999
## 9 | 0000012223344456777788889999999999
## 10 | 000000112344444555589
## 11 | 01123345555
## 12 | 00002234444558
## 13 | 014555599
## 14 | 255588
## 15 | 0
Fn <- ecdf(room$家賃)
plot(Fn, do.point=F, verticals=T, xlab="", ylab="", main="")
curve(pnorm(x,5),0,10, xlab="", ylab="", yaxs="i", ylim=c(0,1))
par(new=T)
curve(pnorm(x,3),0,10, xlab="", ylab="", yaxs="i", ylim=c(0,1), lty=2)
par(new=T)
curve(pnorm(x,7),0,10, xlab="", ylab="", yaxs="i", ylim=c(0,1), lty=3)
text(2.5,0.5,"A")
text(4.5,0.5,"B")
text(6.5,0.5,"C")
x <- c(60000,70000,80000,90000,100000,110000,120000,130000,140000,150000)
y <- c(0,0.029,0.1,0.35,0.6,0.714,0.814,0.893,0.95,1)
plot(x, y, type="l", xlab="", ylab="", yaxs="i", main="")
par(family= "HiraKakuProN-W3")
x <- c(0,0.2,0.4,0.6,0.8,1.0)
y1 <- c(0,200,200,200,200,200)
y1fn <- cumsum(y1)/sum(y1)
y2 <- c(0,100,100,200,300,300)
y2fn <- cumsum(y2)/sum(y2)
y3 <- c(0,0,0,100,100,300)
y3fn <- cumsum(y3)/sum(y3)
op<-par()
par(pty="s")
plot(x*100,y1fn*100, type="l", bty="n", xlab="従業員の累積相対度数 (%)",
ylab="給与の累積相対度数 (%)", lwd=3)
par(new=T)
plot(x*100,y2fn*100, type="l", bty="n", xlab="", ylab="", lty=2, lwd=3)
par(new=T)
plot(x*100,y3fn*100, type="l", bty="n", xlab="", ylab="", lty=3, lwd=3)
polygon(c(0,100,100,0), c(0,0,100,100))
labels <- c("会社1","会社2","会社3")
legend(5,95, legend=labels, lty=1:3, lwd=rep(2,3))
par(pty=op$pty)
par(family= "HiraKakuProN-W3")
x <- c(0,0.2,0.4,0.6,0.8,1.0)
y <- c(0,239100,342552,422916,546313,791970)
yfn <- cumsum(y)/sum(y)
op<-par()
par(pty="s")
plot(x*100, yfn*100, type="l", bty="n",
xlab="世帯数の累積相対度数 (%)", ylab="年間収入の累積相対度数 (%)")
segments(0,0,100,100,lty=2)
polygon(c(0,100,100,0), c(0,0,100,100))
par(pty=op$pty)
par(family= "HiraKakuProN-W3")
boxplot(家賃~近さ, xlab="近さ", ylab="家賃(円)", range=0, data=room)
population_h26 %>% DT::datatable()
par(family= "HiraKakuProN-W3")
hist(population_h26$人口, breaks=seq(0,15000000,by=1000000), main="", xaxt="n",
xlab="人口(人)", ylab="度数", col="gray")
axis(side=1, at=seq(0,12500000,by=2500000),
labels=c(0,"2500000","5000000","7500000","10000000","12500000"), las=1)
par(family= "HiraKakuProN-W3")
op <- par()
par(mgp=c(5,1,0), mar=c(3.1,7.1,4.1,2.1))
boxplot(population_h26$人口, yaxt="n", xlab="", ylab="人口", col="gray")
axis(side=2, at=seq(0,12500000,by=2500000),
labels <- c(0,"2500000","5000000","7500000","10000000","12500000"), las=1)
par(mgp=op$mgp, mar=op$mar)
table(room$間取り)
##
## 1DK 1K 1LDK 1R 1SLDK 2DK 2K 2LDK
## 6 105 8 9 1 9 1 1
par(family= "HiraKakuProN-W3")
barplot(table(room$間取り), xlab="間取り", ylab="度数")
par(family= "HiraKakuProN-W3")
data <- c(23,33,37,14,21,6,1,5)
names(data) <- c("東","南東","南","南西","西","北西","北","北東")
pie(data, labels=names(data), main="方角", clockwise=TRUE)
par(family= "HiraKakuProN-W3")
plot(room$大きさ, room$家賃, pch=c(1, 2)[unclass(room$近さ)],
xlab="大きさ", ylab="家賃")
Labels <- c("A:近い","B:遠い")
legend(45,100000, legend=Labels, pch=1:2)
plot(c(1,1,2,2,3,3,4,4,5,5),c(1,2,2,3,3,4,3,4,4,5),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
plot(c(1,1,2,2,3,3,4,4,5,5),c(3,4,2,3,2,3,3,4,2,4),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
plot(c(1,1,2,2,3,3,4,4,5,5),c(4,5,3,4,3,4,2,3,1,2),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
plot(c(1,1,2,2,3,3,4,5,5),c(1,1.5,2,2.5,3,3.5,4,4.5,5),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
plot(c(1,1,2,2,3,3,4,4,5,5),c(1,2,2,3,3,4,3,4,4,5),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
plot(c(1,1,2,2,3,3,4,4,5,5),c(1,3,2,3,2,3,3,4,3,4),
xlab="x", ylab="y", xlim=c(0,6), ylim=c(0,6))
par(family= "HiraKakuProN-W3")
LabelsX <- c("X15歳以下","X16.24歳","X25.29歳","X30.39歳",
"X40.49歳","X50.59歳","X60.64歳","X65.74歳","X75歳以上")
Labels <- c("15歳以下","16~24歳","25~29歳","30~39歳","40~49歳",
"50~59歳","60~64歳","65~74歳","75歳以上")
matplot(death$平成暦年, death[, LabelsX], type="b", lty=1:9,
pch = 1:9, col=1:9, xlab="平成(年)", ylab="")
legend(20.3,1500,legend=Labels, lty=1:9, pch = 1:9, col=1:9, cex = 0.7)
par(family= "HiraKakuProN-W3")
mean2000<-apply(Salary[1:12,2:3],2,mean)
SalaryIndex.ts<-ts(sweep(Salary[,2:3], 2, mean2000, FUN="/"),
start=c(2000,1), frequency=12)
plot(SalaryIndex.ts, xlab="年月", main="")
par(family= "HiraKakuProN-W3")
SalaryofTokyo.ts<-ts(Salary[,3]/10000,start=c(2000,1),frequency=12)
plot(decompose(SalaryofTokyo.ts), xlab="年月")
par(family= "HiraKakuProN-W3")
acf(Salary$Tokyo,main="コレログラム:東京",xlab="月差")
label <- c("60,001~70,000", "70,001~80,000", "80,001~90,000", "90,001~100,000",
"100,001~110,000", "110,001~120,000", "120,001~130,000", "130,001~140,000",
"140,001~150,000")
table(cut(room$家賃,seq(60000,150000,by=10000),labels=label))
##
## 60,001~70,000 70,001~80,000 80,001~90,000 90,001~100,000
## 4 10 35 35
## 100,001~110,000 110,001~120,000 120,001~130,000 130,001~140,000
## 16 14 11 8
## 140,001~150,000
## 7
table(cut(room$家賃,seq(60000,150000,by=10000),labels=label))/length(room$家賃)
##
## 60,001~70,000 70,001~80,000 80,001~90,000 90,001~100,000
## 0.02857143 0.07142857 0.25000000 0.25000000
## 100,001~110,000 110,001~120,000 120,001~130,000 130,001~140,000
## 0.11428571 0.10000000 0.07857143 0.05714286
## 140,001~150,000
## 0.05000000
-(Rで分散、標準偏差を計算する関数は、不偏分散(n-1で割る分散)に基づいて計算する)
x <- c(room$家賃)
y <- c(room$大きさ)
z <- c(room$築年数)
n <- length(x)
mean(x);median(x);names(which.max(table(x)));
## [1] 101612.9
## [1] 98750
## [1] "99000"
var(x)*(n-1)/n;sd(x)*sqrt((n-1)/n);quantile(x);IQR(x)
## [1] 368989692
## [1] 19209.1
## 0% 25% 50% 75% 100%
## 68000 86750 98750 113250 150000
## [1] 26500
mean(y);median(y);names(which.max(table(y)));
## [1] 26.9
## [1] 25
## [1] "21"
var(y)*(n-1)/n;sd(y)*sqrt((n-1)/n);quantile(y);IQR(y)
## [1] 60.63286
## [1] 7.78671
## 0% 25% 50% 75% 100%
## 15.00 21.75 25.00 29.00 60.00
## [1] 7.25
mean(z);median(z);names(which.max(table(z)));
## [1] 9.557143
## [1] 9
## [1] "6"
var(z)*(n-1)/n;sd(z)*sqrt((n-1)/n);quantile(z);IQR(z)
## [1] 30.58959
## [1] 5.530786
## 0% 25% 50% 75% 100%
## 0 6 9 13 28
## [1] 7
table(room$間取り)
##
## 1DK 1K 1LDK 1R 1SLDK 2DK 2K 2LDK
## 6 105 8 9 1 9 1 1
table(room$間取り)/length(room$間取り)
##
## 1DK 1K 1LDK 1R 1SLDK 2DK
## 0.042857143 0.750000000 0.057142857 0.064285714 0.007142857 0.064285714
## 2K 2LDK
## 0.007142857 0.007142857
table(room$方角)[c("東","南東","南","南西","西","北西","北","北東")]
##
## 東 南東 南 南西 西 北西 北 北東
## 23 33 37 14 21 6 1 5
table(room$方角)[c("東","南東","南","南西","西","北西","北","北東")]/length(room$方角)
##
## 東 南東 南 南西 西 北西
## 0.164285714 0.235714286 0.264285714 0.100000000 0.150000000 0.042857143
## 北 北東
## 0.007142857 0.035714286
label1 <- c("~15以下", "15~20", "20~25", "25~30", "30~35", "35~40",
"40~45", "45~50", "50~55", "55~60")
label2 <- c("~7以下", "7~8", "8~9", "9~10", "10~11", "11~12",
"12~13", "13~14", "14~15")
table(cut(room$大きさ, seq(10,60,by=5), labels=label1),
cut(room$家賃, seq(60000,150000,by=10000), labels=label2)) %>% knitr::kable()
| ~7以下 | 7~8 | 8~9 | 9~10 | 10~11 | 11~12 | 12~13 | 13~14 | 14~15 | |
|---|---|---|---|---|---|---|---|---|---|
| ~15以下 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 15~20 | 4 | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 0 |
| 20~25 | 0 | 5 | 27 | 17 | 8 | 2 | 1 | 0 | 0 |
| 25~30 | 0 | 0 | 3 | 17 | 7 | 3 | 5 | 0 | 1 |
| 30~35 | 0 | 0 | 0 | 0 | 1 | 5 | 3 | 0 | 0 |
| 35~40 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 3 | 1 |
| 40~45 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 4 |
| 45~50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 |
| 50~55 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 55~60 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
x <- c(room$家賃)
y <- c(room$大きさ)
z <- c(room$築年数)
cor(x,y);cor(y,z);cor(x,z)
## [1] 0.8411926
## [1] 0.5159447
## [1] 0.2454515
cov(room$家賃,room$大きさ)/var(room$大きさ)
## [1] 2075.145
mean(room$家賃)-cov(room$家賃,room$大きさ)/var(room$大きさ)*mean(room$大きさ)
## [1] 45791.44
cor(room$家賃,room$大きさ)
## [1] 0.8411926
summary(lm(room$家賃~room$大きさ))$r.squared
## [1] 0.707605
table(cut(room$大きさ, c(0, 25, 100), right=FALSE, labels=c("狭い", "広い")),
cut(room$家賃, c(0, 100000, 200000), right=FALSE, labels=c("安い", "高い")))
##
## 安い 高い
## 狭い 57 8
## 広い 22 53
table(room$近さ,
cut(room$家賃, c(0, 100000, 200000), right=FALSE, labels=c("安い", "高い")))
##
## 安い 高い
## A 36 33
## B 43 28