期末報告

1.台灣確診人數長條圖及邊界地圖(5/16)
2.台灣加權指數與台積電之plotly雙y軸及蠟燭圖觀察(1/1~5/19)

yujen
2022-05-24

前言-確診人數圖

台灣近3年來飽受疫情所苦,所以期末主題想製作與疫情相關的題目,透過ggplot繪製長條圖,以及透過邊界地圖,了解台灣各地區的確診情況。

前言-plotly雙y軸及蠟燭圖觀察台積電與台股加權指數關係

透過plotly雙y軸圖及蠟燭圖等方式,了解台股加權指數與台積電的關係。

確診人數長條圖及邊界地圖

套件

library(readxl)
library(ggplot2)
library(leaflet)
library(maptools)
library(rgdal)

設定、匯入資料

df = read_excel("C:/Users/yujen/Desktop/AS/AScovid19.xlsx",col_names=F) 
  #匯入資料
colnames(df) = c("縣市", "人數","經度","緯度") 
  #設定變數名稱

ggplot繪圖

ggplot(df, aes(x=reorder(縣市,人數), y=人數/10000, fill=人數)) + 
  #把xy軸翻轉並設定參數
geom_bar(stat = "identity") +
  #設定長條圖
labs(title="台灣確診人數", x="縣市", y="人數", 
     subtitle="單位:(萬)",caption="資料來源:衛福部")+  
  #增加標題、軸的名稱
coord_flip() + 
theme(plot.title=element_text
     (face="bold", size = 14),
      axis.title=element_text(
      size=12), plot.subtitle = element_text(size = 10))

  #設定字體、樣式

確診人數地理分布

匯入、整理資料

path = "C:/Users/yujen/Desktop/AS/leaflet-rgdal-台灣縣市人口數量 (台灣邊界地圖)/"
shp_file = paste0(path, "mapdata202003270418/TOWN_MOI_1090324.shp")
  #設定路徑
Taiwan_shp = rgdal::readOGR(shp_file, layer="TOWN_MOI_1090324")
  #匯入資料

查看資料內容並轉檔

names(Taiwan_shp)
  #觀察資料屬性
Taiwan_shp$COUNTYNAME
Taiwan_shp$COUNTY = iconv(Taiwan_shp$COUNTYNAME, from="UTF-8")
  #因為原本是亂碼 需要轉檔 就不會有亂碼
print(Taiwan_shp$COUNTY)
  #檢查是否還是亂碼
mydata = data.frame(name=Taiwan_shp$COUNTY)
  #設定data frame

建立迴圈

df$popup = paste0(df$縣市,": ",df$人數,"")
  #設定後面需要popup的部分
mydata$num = NA
mydata$popup = NA
  #開出欄位 迴圈才能做
for(i in 1:nrow(mydata)){
  mydata$num[i] = df$人數[which(df$縣市 == mydata$name[i])]/10000
  mydata$popup[i] = paste0(mydata$name[i], "<br>", mydata$name[i], " 萬人")}
  #比對名稱後設定確診人數的地圖邊界

設定顏色

palette = colorRampPalette(colors=c("white","gray75","gray50","gray25","gray1"),
                           space="Lab")(30)
  #設定五個階層的顏色
pal = leaflet::colorNumeric(palette=palette, domain=mydata$num)

透過leaflet繪圖

map = leaflet(Taiwan_shp) %>%
  setView(120.9738819, 23.97565, 6) %>%
  #設定地圖中心 (經度,緯度,大小)
  addTiles(group = "map1") %>%
  #基礎地圖
  addProviderTiles(providers$Esri.NatGeoWorldMap,
                   group = "map2") %>%
  addProviderTiles(providers$CartoDB.Positron,
                   group = "map3") %>%
  #新增其他種地圖
  addPolygons(stroke=FALSE,
              opacity=0.5,
              fillOpacity=1, 
              smoothFactor=0.5, 
              fillColor=~pal(mydata$num),
              color="black",
              weight=1,
              popup=mydata$popup,
              highlightOptions=highlightOptions
              (color="white",
               weight=1,bringToFront=TRUE),
               group="open") %>%
  #多邊形的函數 把邊界地圖畫出
  addLegend(pal=pal, 
            values=mydata$num,
            title="確診人數(萬)") %>%
  #設定Legends
  addMiniMap(width = 100,
             height = 100,
             collapsedWidth = 19,
             collapsedHeight = 19,
             toggleDisplay = TRUE,
             position = "bottomleft") %>%
  #設定左下角的小地圖 範圍及顯示大小
  addMarkers(data=df,
             lng=~經度, 
             lat=~緯度,
             popup=~popup,
             group="popup") %>%
  #增加popup顯示各地區確診人數
  addLayersControl(
    baseGroups = c("map1", "map2" , "map3"),
    overlayGroups = c("open","popup"),
    options = layersControlOptions(collapsed = FALSE))
  #設定group 可以做成勾選的效果 顯示我要的部分
map

結論1

從地圖中可以看到確診者大多集中在人口稠密的北部地區,與直方圖呈現的結果一致。

台灣加權指數與台積電之plotly雙y軸及蠟燭圖

套件

library(quantmod)
library(plotly)
library(reshape2)
library(ggplot2)
library(ggpmisc)
library(lawstat)
library(car) 
library(nortest)
library("MASS")
library(lmtest)

取得及整理台積電與台灣加權指數資料

ticker1 = "^TWII"   
  #台灣股票加權指數
ticker2 = "2330.TW"
  #台積電
start = "2022-01-01"    
  #開始的時間
end = Sys.Date()    
  #電腦上的時間
index = quantmod::getSymbols(ticker1, 
                     src="yahoo",
                     from=start,
                     to=end,
                     auto.assign=F)
  #獲取資料
tsmc = quantmod::getSymbols(ticker2, 
                     src="yahoo",
                     from=start,
                     to=end,
                     auto.assign=F)
  #獲取資料
tsmc$return = quantmod::periodReturn(tsmc$`2330.TW.Close`,
                                      period="daily", 
                                      type="arithmetic")
  #計算報酬率
index$return = quantmod::periodReturn(index$TWII.Close,
                                     period="daily", 
                                     type="arithmetic")
  #計算報酬率

tsmc1 = tsmc[-1] 
index1 = index[-1]
  #刪除第一筆資料 因為報酬率第一筆為0
df1 = merge(index1$return,tsmc1$return)
  #合併我要的資料
colnames(df1) = c("index","tsmc")
  #取名
df2 = data.frame(df1)
  #轉換格式
df3 = data.frame(Date=index(df1),df1)
  #把時間變成變數
rownames(df3) = NULL 
  #刪除列的名字
df4 = merge(index1$TWII.Close,tsmc1$X2330.TW.Close)
  #取得需要資料
df5 = data.frame(Date=index(df4),df4)
  #轉換格式
rownames(df5) = NULL 
  #刪除列的名字

蠟燭圖

#台灣加權指數蠟燭圖
indexplot = data.frame(Date=index(index), coredata(index))
fig1 = indexplot %>% 
  # A%>% B (先做A在做B)
  plot_ly(x=~Date, 
  #~是取自資料框的資料而非記憶體
          type="candlestick", 
          open=~TWII.Open,   
          close=~TWII.Close,
          high=~TWII.High, 
          low=~TWII.Low) %>% 
  layout(title="Candlestick Chart")
  #輸入必要資訊及劃出蠟燭圖
fig1
#台積電蠟燭圖
tsmcplot = data.frame(Date=index(tsmc), coredata(tsmc))
fig2 = tsmcplot %>% 
  plot_ly(x=~Date, 
          type="candlestick", 
          open=~X2330.TW.Open,  
          close=~X2330.TW.Close,
          high=~X2330.TW.High, 
          low=~X2330.TW.Low) %>% 
  layout(title="Candlestick Chart")
fig2

雙y軸plotly-報酬率

gra1 = plot_ly()
  #設定空白畫布
gra1 = gra1 %>% 
  add_trace(x =df3$Date , y =df3$index*100, name = "index", mode = "lines+markers", type = "scatter")
  #設定X及Y1參數、模式、如何作圖
ay = list(
  tickfont = list(color = "red"),
  overlaying = "y",
  side = "right",
  title = "<b>tsmc</b>")
  #設定Y2座標數字顏色、位置、標題

gra1 = gra1 %>% add_trace(x = df3$Date, y = df3$tsmc*100 , name = "tsmc", yaxis = "y2", mode = "lines+markers", type = "scatter")
  #設定X及Y2參數、模式、如何作圖
gra1 = gra1 %>% 
  layout(
    title = "報酬率", yaxis2 = ay,
    xaxis = list(title="<b>date</b>"),
    yaxis = list(title="<b>index</b>"))%>%
  #設定標題、座標名稱
  layout(plot_bgcolor='#e5ecf6',
    xaxis = list(
    zerolinecolor = '#ffff',
    zerolinewidth = 2,
    gridcolor = 'ffff'),
    yaxis = list(
    zerolinecolor = '#ffff',
    zerolinewidth = 2,
    gridcolor = 'ffff'))
  #設定軸線的顏色
gra1

雙y軸plotly-收盤價

gra2 = plot_ly()
gra2 = gra2 %>% 
  add_trace(x =df5$Date ,
            y =df5$TWII.Close, 
            name = "index", 
            mode = "lines+markers", 
            type = "scatter")
#設定X及Y1參數、模式、如何作圖
ay2 = list(
  tickfont = list(color = "red"),
  overlaying = "y",
  side = "right",
  title = "<b>tsmc</b>")

gra2 = gra2 %>% add_trace(x = df5$Date, 
                          y = df5$X2330.TW.Close , 
                          name = "tsmc", yaxis = "y2", 
                          mode = "lines+markers", 
                          type = "scatter")
gra2 = gra2 %>% layout(
  title = "收盤價", yaxis2 = ay2,
  xaxis = list(title="<b>date</b>"),
  yaxis = list(title="<b>index</b>"))%>%
  layout(plot_bgcolor='#e5ecf6',
         xaxis = list(
           zerolinecolor = '#ffff',
           zerolinewidth = 2,
           gridcolor = 'ffff'),
         yaxis = list(
           zerolinecolor = '#ffff',
           zerolinewidth = 2,
           gridcolor = 'ffff'))
gra2

迴歸報表(算得beta值)

lmfit = lm(formula = index~tsmc,data = df2)
a = summary(lmfit) 
a

Call:
lm(formula = index ~ tsmc, data = df2)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.012372 -0.003694  0.001035  0.003335  0.014102 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.0001620  0.0005884  -0.275    0.784    
tsmc         0.6179152  0.0353076  17.501   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.005516 on 87 degrees of freedom
Multiple R-squared:  0.7788,    Adjusted R-squared:  0.7762 
F-statistic: 306.3 on 1 and 87 DF,  p-value: < 2.2e-16

ggplot報酬率折線圖-延伸

#ggplot報酬率折線圖
ggplotline = data.frame(Date=index(df1),df1)
rownames(ggplotline) = NULL 
ggplotline1 = melt(ggplotline,id= "Date")
colnames(ggplotline1) = c("date","type","return")
ggplot(ggplotline1, aes(x=date,y=return*100),color=type) +
  geom_line(aes(linetype = type))+
  labs(x="date", y="return%",
        title="折線圖",size =2) +
  theme(plot.title=element_text(color = 'black', size = 12)) + 
  theme(axis.title=element_text(color='steel blue', size=12,)) + 
  theme(axis.line = element_line(colour="black", size = 0.5))

##散布圖-延伸

ggplot(df2, aes(x=tsmc*100,y=index*100)) + 
  geom_point(color = "steel blue") +
  labs( x="tsmc%", y="index%",
        title="散布圖",subtitle="紅色為迴歸線",size =2) +
  theme(plot.title=element_text(color = 'black', size = 12)) + 
  theme(axis.title=element_text(color='steel blue', size=12,)) + 
  theme(axis.line = element_line(colour="black", size = 0.5)) 

殘差圖-延伸

df1$fitted = fitted.values(lmfit)
df1$residuals =residuals(lmfit)
plot(lmfit)

檢定-延伸

虛無假設H0:殘差項符合常態分配
對立假設H1:殘差項不符合常態分配

#Kolmogorov-Smirnov Test
lillie.test(df1$residuals)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  df1$residuals
D = 0.87559, p-value < 2.2e-16

p-value > α=0.05,
因此拒絕H1,接受H0,代表殘差項符合常態分配
—————————————————————-

虛無假設H0:殘差項變異數具有同質性
對立假設H1:殘差項變異數不具有同質性

#bp Test
bptest(lmfit)

    studentized Breusch-Pagan test

data:  lmfit
BP = 0.74431, df = 1, p-value = 0.3883

p-value > α=0.05,
因此拒絕H1,接受H0,代表殘差項間變異數具有同質性
—————————————————————-

虛無假設H0:殘差項間相互獨立
對立假設H1:殘差項間相互不獨立

#durbinWatson Test
durbinWatsonTest(lmfit)
 lag Autocorrelation D-W Statistic p-value
   1      0.05686737      1.842171    0.47
 Alternative hypothesis: rho != 0

p-value > α=0.05,
因此拒絕H1,接受H0,代表殘差項間相互獨立

##迴歸線散布圖-延伸

ggplot(df2, aes(x=tsmc*100,y=index*100)) + 
  geom_point(color = "steel blue") +
  labs( x="tsmc%", y="index%",
        title="散布圖",subtitle="紅色為迴歸線",size =2) +
  theme(plot.title=element_text(color = 'black', size = 12)) + 
  theme(axis.title=element_text(color='steel blue', size=12,)) + 
  theme(axis.line = element_line(colour="black", size = 0.5)) +
  geom_smooth(method ='lm',formula =y~x,se=FALSE, color='red') +
  stat_poly_eq(formula = y~x, eq.with.lhs = "italic(hat(y))~`=`~",
               aes(label = paste(..eq.label.., ..rr.label.., sep = "*plain (\",\")~")),
               parse=TRUE) 

結論2

透過plotly雙y軸圖及蠟燭圖等方式,了解到台股加權指數與台積電的關係呈現高度相關
為了加以證明,我額外自己去做了簡單線性迴歸分析,我們可以發現其r square
很高約有0.77,表示高度正相關,其後透過模型檢定,
發現線性、常態性、變異數同質性及獨立性皆符合簡單線性迴歸假設,因此模型成立,
也意謂著大盤的走勢與台積電的關係密不可分。