dta<- read.csv("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt", header = T)
head(dta)
## X1061.Alaska.Pacific.University......AK.IIB..454.382.362.382..567.485.471..487...6..11...9...4...32
## 1 1063 Univ.Alaska-Fairbanks AK I 686 560 432 508 914 753 572 677 74 125 118 40 404
## 2 1065 Univ.Alaska-Southeast AK IIA 533 494 329 415 716 663 442 559 9 26 20 9 70
## 3 11462 Univ.Alaska-Anchorage AK IIA 612 507 414 498 825 681 557 670 115 124 101 21 392
## 4 1002 Alabama Agri.&Mech. Univ. AL IIA 442 369 310 350 530 444 376 423 59 77 102 24 262
## 5 1004 University of Montevallo AL IIA 441 385 310 388 542 473 383 477 57 33 35 2 127
## 6 1008 Athens State College AL IIB 466 394 351 396 558 476 427 478 20 18 30 0 68
readr::fwf_empty("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt")
## $begin
## [1] 0 6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
##
## $end
## [1] 5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
##
## $col_names
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16"
View(dta)
#雖有16個欄位,但洲的縮寫和type也各給它一欄,共17欄位
dta <- readr::read_fwf("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt",
readr::fwf_cols(X1=6, X2=31,X3=2, X4=4, X5=5, X6=4, X7=4, X8=4, X9=5, X10=4, X11=4, X12=5,X13=4, X14=4, X15=4, X16=5, X17=5), na='*')
## Rows: 1161 Columns: 17
## -- Column specification --------------------------------------------------------
##
## chr (3): X2, X3, X4
## dbl (14): X1, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
dta |> as.data.frame() |> head()
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## 1 1061 Alaska Pacific University AK IIB 454 382 362 382 567 485 471 487 6
## 2 1063 Univ.Alaska-Fairbanks AK I 686 560 432 508 914 753 572 677 74
## 3 1065 Univ.Alaska-Southeast AK IIA 533 494 329 415 716 663 442 559 9
## 4 11462 Univ.Alaska-Anchorage AK IIA 612 507 414 498 825 681 557 670 115
## 5 1002 Alabama Agri.&Mech. Univ. AL IIA 442 369 310 350 530 444 376 423 59
## 6 1004 University of Montevallo AL IIA 441 385 310 388 542 473 383 477 57
## X14 X15 X16 X17
## 1 11 9 4 32
## 2 125 118 40 404
## 3 26 20 9 70
## 4 124 101 21 392
## 5 77 102 24 262
## 6 33 35 2 127
#確認資料
dplyr::glimpse(dta)
## Rows: 1,161
## Columns: 17
## $ X1 <dbl> 1061, 1063, 1065, 11462, 1002, 1004, 1008, 1009, 1012, 1016, 1019,~
## $ X2 <chr> "Alaska Pacific University", "Univ.Alaska-Fairbanks", "Univ.Alaska~
## $ X3 <chr> "AK", "AK", "AK", "AK", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ~
## $ X4 <chr> "IIB", "I", "IIA", "IIA", "IIA", "IIA", "IIB", "I", "IIB", "IIB", ~
## $ X5 <dbl> 454, 686, 533, 612, 442, 441, 466, 580, 498, 506, 339, 461, 360, 3~
## $ X6 <dbl> 382, 560, 494, 507, 369, 385, 394, 437, 379, 412, 303, 389, 304, 3~
## $ X7 <dbl> 362, 432, 329, 414, 310, 310, 351, 374, 322, 359, 287, 338, 258, 2~
## $ X8 <dbl> 382, 508, 415, 498, 350, 388, 396, 455, 401, 411, 301, 386, 300, 2~
## $ X9 <dbl> 567, 914, 716, 825, 530, 542, 558, 692, 655, 607, 421, 585, 433, 4~
## $ X10 <dbl> 485, 753, 663, 681, 444, 473, 476, 527, 501, 508, 371, 496, 369, 4~
## $ X11 <dbl> 471, 572, 442, 557, 376, 383, 427, 451, 404, 445, 347, 436, 313, 3~
## $ X12 <dbl> 487, 677, 559, 670, 423, 477, 478, 546, 523, 503, 366, 493, 363, 3~
## $ X13 <dbl> 6, 74, 9, 115, 59, 57, 20, 366, 34, 67, 8, 106, 27, 17, 18, 83, 23~
## $ X14 <dbl> 11, 125, 26, 124, 77, 33, 18, 354, 25, 40, 15, 42, 25, 19, 28, 46,~
## $ X15 <dbl> 9, 118, 20, 101, 102, 35, 30, 301, 27, 66, 19, 66, 33, 31, 28, 77,~
## $ X16 <dbl> 4, 40, 9, 21, 24, 2, 0, 66, 3, 27, 2, 58, 4, 19, 3, 9, 1, 10, 19, ~
## $ X17 <dbl> 32, 404, 70, 392, 262, 127, 68, 1109, 89, 200, 44, 272, 89, 86, 77~
#看看表格的樣子,再次確認資料
View(dta)
roster<- read.csv("C:/Nicole/Rstudio IDE code/R input output/ncku_roster.csv", header = T, stringsAsFactors=TRUE, fileEncoding="big5")
#確認資料
head(roster)|> knitr::kable()
座號 | 系.年.班 | 開課系序號 | 學號 | 姓名 | 成績 | 選課時間 |
---|---|---|---|---|---|---|
教師:U3023 許清芳 | 上課時間: 一[6-8];開課號:U3006 U7031 | 科目:資料管理 | NA | |||
1 | 心理系 3 | U7031 | D840239 | 蘇 | NA | 02/17/2016 09:17:40 |
2 | 心理系 3 | U7031 | D840057 | 吳 | NA | 02/17/2016 09:17:28 |
3 | 心理系 4 | U7031 | D841311 | 余 | NA | 02/17/2016 09:09:10 |
4 | 心理系 4 | U7031 | D840140 | 王 | NA | 02/17/2016 09:09:34 |
5 | 教育所 1 碩 | U3006 | U360098 | 劉 | NA | 01/18/2016 14:56:35 |
roster1<- read.csv("C:/Nicole/Rstudio IDE code/R input output/ncku_roster.csv", header = T, fileEncoding="big5") [-1 ,c(2,4,5)]
#不能給維度小於二的物件設 ‘colnames’ ,所以做as.data.frame
head(as.data.frame(roster1))
## 系.年.班 學號 姓名
## 2 心理系 3 D840239 蘇
## 3 心理系 3 D840057 吳
## 4 心理系 4 D841311 余
## 5 心理系 4 D840140 王
## 6 教育所 1 碩 U360098 劉
## 7 教育所 1 博 U380416 陳
colnames(roster1) <- c("Major","ID","Name")
str(roster1)
## 'data.frame': 15 obs. of 3 variables:
## $ Major: chr "心理系 3 " "心理系 3 " "心理系 4 " "心理系 4 " ...
## $ ID : chr "D840239" "D840057" "D841311" "D840140" ...
## $ Name : chr "蘇" "吳" "余" "王" ...
View(roster1)
roster1[,1] <- substr(roster1[,1],1,3)
#table(roster1$Major) had error “X” is a list
table(roster1[1])
##
## 心理系 心理所 教育所
## 4 7 4
#使用 readxl 套件讀取 Excel 檔案
library(readxl)
normtemp<- read_excel("C:/Nicole/Rstudio IDE code/R input output/normtemp.xls", sheet = "Data" )
head(normtemp)
## # A tibble: 6 x 3
## Temp Sex Beats
## <dbl> <dbl> <dbl>
## 1 96.3 1 70
## 2 96.7 1 71
## 3 96.9 1 74
## 4 97 1 80
## 5 97.1 1 73
## 6 97.1 1 75
#body temperature and heart rate
normtemp.lm <- lm(Temp ~Beats, data=normtemp)
#show results
summary(normtemp.lm)
##
## Call:
## lm(formula = Temp ~ Beats, data = normtemp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85017 -0.39999 0.01033 0.43915 2.46549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.306754 0.657703 146.429 < 2e-16 ***
## Beats 0.026335 0.008876 2.967 0.00359 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.712 on 128 degrees of freedom
## Multiple R-squared: 0.06434, Adjusted R-squared: 0.05703
## F-statistic: 8.802 on 1 and 128 DF, p-value: 0.003591
#R-squared=0.06434 P=0.00359, High temperature and high heart rate
plot(Temp ~Beats, data=normtemp)
abline(lm(Temp ~Beats, data=normtemp), lty=2)
title("Temperature vs Heart rate")
#gender difference in mean temperature
#AOV可檢定2組以上
summary(aov(normtemp$Sex ~ normtemp$Temp ))
## Df Sum Sq Mean Sq F value Pr(>F)
## normtemp$Temp 1 1.274 1.274 5.223 0.0239 *
## Residuals 128 31.226 0.244
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#There is a difference between the average temperature of men and women(P=0.0239)
boxplot(Temp ~ Sex, data = normtemp,
xlab = "Sex",
ylab = "Temperature")
data(women)
head(women)
## height weight
## 1 58 115
## 2 59 117
## 3 60 120
## 4 61 123
## 5 62 126
## 6 63 129
plot(women)
#製造錯誤的檔案
women1<-women
data.entry(women1)
women1
## $height
## [1] 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
##
## $weight
## [1] 115 117 120 123 126 129 132 135 139 142 146 150 154 159 164
class(women1)
## [1] "list"
#指定抓出XY畫圖
plot(women1$height,women1$weight)
#Import excel files
pacman::p_load(readxl, httr)
fL <- "https://www.ris.gov.tw/documents/data/en/3/History-Table-8-2020.xls"
GET(fL, write_disk(tf <- tempfile(fileext=".xls")))
## Response [https://www.ris.gov.tw/documents/data/en/3/History-Table-8-2020.xls]
## Date: 2021-10-31 11:00
## Status: 200
## Content-Type: application/vnd.ms-excel
## Size: 34.3 kB
## <ON DISK> C:\Users\USER\AppData\Local\Temp\Rtmpy25ujZ\fileb2017cd77e2.xls
Marriage <-read_excel(tf, sheet=1)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
dplyr::glimpse(Marriage )
## Rows: 55
## Columns: 5
## $ `Table 8. Couples of Marriages, Divorces, Crude Marriage Rate and Crude Divorce Rate` <chr> ~
## $ ...2 <chr> ~
## $ ...3 <chr> ~
## $ ...4 <chr> ~
## $ ...5 <chr> ~
#Confirm the field to be deleted
View(Marriage)
Marriage1<- Marriage[-c(1:3,50:55),c(1,5) ] |> as.data.frame()
head(Marriage1)
## Table 8. Couples of Marriages, Divorces, Crude Marriage Rate and Crude Divorce Rate
## 1 1975
## 2 1976
## 3 1977
## 4 1978
## 5 1979
## 6 1980
## ...5
## 1 0.46000000000000002
## 2 0.5
## 3 0.56000000000000005
## 4 0.64000000000000001
## 5 0.72999999999999998
## 6 0.77000000000000002
View(Marriage1)
colnames(Marriage1) <- c("Year ","Crude Divorce Rate")
#plot: the trend of the crude divorce rate over the years.
plot(Marriage1, xlab ="Year", ylab = "Crude Divorce Rate")