20211031 in class SCL

In class 1

dta<- read.csv("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt", header = T)

head(dta)

##   X1061.Alaska.Pacific.University......AK.IIB..454.382.362.382..567.485.471..487...6..11...9...4...32
## 1  1063 Univ.Alaska-Fairbanks          AK I    686 560 432 508  914 753 572  677  74 125 118  40  404
## 2  1065 Univ.Alaska-Southeast          AK IIA  533 494 329 415  716 663 442  559   9  26  20   9   70
## 3 11462 Univ.Alaska-Anchorage          AK IIA  612 507 414 498  825 681 557  670 115 124 101  21  392
## 4  1002 Alabama Agri.&Mech. Univ.      AL IIA  442 369 310 350  530 444 376  423  59  77 102  24  262
## 5  1004 University of Montevallo       AL IIA  441 385 310 388  542 473 383  477  57  33  35   2  127
## 6  1008 Athens State College           AL IIB  466 394 351 396  558 476 427  478  20  18  30   0   68

readr::fwf_empty("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt")

## $begin
##  [1]  0  6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
## 
## $end
##  [1]  5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
## 
## $col_names
##  [1] "X1"  "X2"  "X3"  "X4"  "X5"  "X6"  "X7"  "X8"  "X9"  "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16"

View(dta)

#雖有16個欄位，但洲的縮寫和type也各給它一欄，共17欄位

dta <- readr::read_fwf("C:/Nicole/Rstudio IDE code/R input output/aaup2.txt",
              readr::fwf_cols(X1=6, X2=31,X3=2, X4=4, X5=5, X6=4, X7=4, X8=4, X9=5, X10=4, X11=4, X12=5,X13=4, X14=4, X15=4, X16=5, X17=5), na='*')

## Rows: 1161 Columns: 17

## -- Column specification --------------------------------------------------------
## 
## chr  (3): X2, X3, X4
## dbl (14): X1, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

dta |> as.data.frame() |> head()

##      X1                        X2 X3  X4  X5  X6  X7  X8  X9 X10 X11 X12 X13
## 1  1061 Alaska Pacific University AK IIB 454 382 362 382 567 485 471 487   6
## 2  1063     Univ.Alaska-Fairbanks AK   I 686 560 432 508 914 753 572 677  74
## 3  1065     Univ.Alaska-Southeast AK IIA 533 494 329 415 716 663 442 559   9
## 4 11462     Univ.Alaska-Anchorage AK IIA 612 507 414 498 825 681 557 670 115
## 5  1002 Alabama Agri.&Mech. Univ. AL IIA 442 369 310 350 530 444 376 423  59
## 6  1004  University of Montevallo AL IIA 441 385 310 388 542 473 383 477  57
##   X14 X15 X16 X17
## 1  11   9   4  32
## 2 125 118  40 404
## 3  26  20   9  70
## 4 124 101  21 392
## 5  77 102  24 262
## 6  33  35   2 127

#確認資料

dplyr::glimpse(dta)

## Rows: 1,161
## Columns: 17
## $ X1  <dbl> 1061, 1063, 1065, 11462, 1002, 1004, 1008, 1009, 1012, 1016, 1019,~
## $ X2  <chr> "Alaska Pacific University", "Univ.Alaska-Fairbanks", "Univ.Alaska~
## $ X3  <chr> "AK", "AK", "AK", "AK", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ~
## $ X4  <chr> "IIB", "I", "IIA", "IIA", "IIA", "IIA", "IIB", "I", "IIB", "IIB", ~
## $ X5  <dbl> 454, 686, 533, 612, 442, 441, 466, 580, 498, 506, 339, 461, 360, 3~
## $ X6  <dbl> 382, 560, 494, 507, 369, 385, 394, 437, 379, 412, 303, 389, 304, 3~
## $ X7  <dbl> 362, 432, 329, 414, 310, 310, 351, 374, 322, 359, 287, 338, 258, 2~
## $ X8  <dbl> 382, 508, 415, 498, 350, 388, 396, 455, 401, 411, 301, 386, 300, 2~
## $ X9  <dbl> 567, 914, 716, 825, 530, 542, 558, 692, 655, 607, 421, 585, 433, 4~
## $ X10 <dbl> 485, 753, 663, 681, 444, 473, 476, 527, 501, 508, 371, 496, 369, 4~
## $ X11 <dbl> 471, 572, 442, 557, 376, 383, 427, 451, 404, 445, 347, 436, 313, 3~
## $ X12 <dbl> 487, 677, 559, 670, 423, 477, 478, 546, 523, 503, 366, 493, 363, 3~
## $ X13 <dbl> 6, 74, 9, 115, 59, 57, 20, 366, 34, 67, 8, 106, 27, 17, 18, 83, 23~
## $ X14 <dbl> 11, 125, 26, 124, 77, 33, 18, 354, 25, 40, 15, 42, 25, 19, 28, 46,~
## $ X15 <dbl> 9, 118, 20, 101, 102, 35, 30, 301, 27, 66, 19, 66, 33, 31, 28, 77,~
## $ X16 <dbl> 4, 40, 9, 21, 24, 2, 0, 66, 3, 27, 2, 58, 4, 19, 3, 9, 1, 10, 19, ~
## $ X17 <dbl> 32, 404, 70, 392, 262, 127, 68, 1109, 89, 200, 44, 272, 89, 86, 77~

#看看表格的樣子，再次確認資料

View(dta)

In class 2

roster<- read.csv("C:/Nicole/Rstudio IDE code/R input output/ncku_roster.csv", header = T, stringsAsFactors=TRUE, fileEncoding="big5")

#確認資料

head(roster)|> knitr::kable()

座號	系.年.班	開課系序號	學號	姓名	成績	選課時間
教師:U3023 許清芳			上課時間: 一[6-8];開課號:U3006 U7031	科目:資料管理	NA
1	心理系 3	U7031	D840239	蘇	NA	02/17/2016 09:17:40
2	心理系 3	U7031	D840057	吳	NA	02/17/2016 09:17:28
3	心理系 4	U7031	D841311	余	NA	02/17/2016 09:09:10
4	心理系 4	U7031	D840140	王	NA	02/17/2016 09:09:34
5	教育所 1 碩	U3006	U360098	劉	NA	01/18/2016 14:56:35

roster1<- read.csv("C:/Nicole/Rstudio IDE code/R input output/ncku_roster.csv", header = T, fileEncoding="big5") [-1 ,c(2,4,5)]

#不能給維度小於二的物件設 ‘colnames’ ，所以做as.data.frame

head(as.data.frame(roster1))

##                                            系.年.班    學號 姓名
## 2 心理系           3                                D840239   蘇
## 3 心理系           3                                D840057   吳
## 4 心理系           4                                D841311   余
## 5 心理系           4                                D840140   王
## 6 教育所           1 碩                             U360098   劉
## 7 教育所           1 博                             U380416   陳

colnames(roster1) <- c("Major","ID","Name")

str(roster1)

## 'data.frame':    15 obs. of  3 variables:
##  $ Major: chr  "心理系           3                               " "心理系           3                               " "心理系           4                               " "心理系           4                               " ...
##  $ ID   : chr  "D840239" "D840057" "D841311" "D840140" ...
##  $ Name : chr  "蘇" "吳" "余" "王" ...

View(roster1)

roster1[,1] <- substr(roster1[,1],1,3)

#table(roster1$Major) had error “X” is a list

table(roster1[1])

## 
## 心理系 心理所 教育所 
##      4      7      4

In class 3

#使用 readxl 套件讀取 Excel 檔案

library(readxl)

normtemp<- read_excel("C:/Nicole/Rstudio IDE code/R input output/normtemp.xls", sheet = "Data" )

head(normtemp)

## # A tibble: 6 x 3
##    Temp   Sex Beats
##   <dbl> <dbl> <dbl>
## 1  96.3     1    70
## 2  96.7     1    71
## 3  96.9     1    74
## 4  97       1    80
## 5  97.1     1    73
## 6  97.1     1    75

#body temperature and heart rate

normtemp.lm <- lm(Temp ~Beats, data=normtemp)

#show results

summary(normtemp.lm)

## 
## Call:
## lm(formula = Temp ~ Beats, data = normtemp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85017 -0.39999  0.01033  0.43915  2.46549 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 96.306754   0.657703 146.429  < 2e-16 ***
## Beats        0.026335   0.008876   2.967  0.00359 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.712 on 128 degrees of freedom
## Multiple R-squared:  0.06434,    Adjusted R-squared:  0.05703 
## F-statistic: 8.802 on 1 and 128 DF,  p-value: 0.003591

#R-squared=0.06434 P=0.00359, High temperature and high heart rate

plot(Temp ~Beats, data=normtemp)
abline(lm(Temp ~Beats, data=normtemp), lty=2)
title("Temperature vs  Heart rate")

#gender difference in mean temperature

#AOV可檢定2組以上

summary(aov(normtemp$Sex ~ normtemp$Temp ))

##                Df Sum Sq Mean Sq F value Pr(>F)  
## normtemp$Temp   1  1.274   1.274   5.223 0.0239 *
## Residuals     128 31.226   0.244                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#There is a difference between the average temperature of men and women(P=0.0239)

boxplot(Temp ~ Sex, data = normtemp, 
        xlab = "Sex", 
        ylab = "Temperature")

In class 4

data(women)
head(women)

##   height weight
## 1     58    115
## 2     59    117
## 3     60    120
## 4     61    123
## 5     62    126
## 6     63    129

plot(women)

#製造錯誤的檔案

women1<-women
data.entry(women1)

women1

## $height
##  [1] 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 
## $weight
##  [1] 115 117 120 123 126 129 132 135 139 142 146 150 154 159 164

class(women1)

## [1] "list"

#指定抓出XY畫圖

plot(women1$height,women1$weight)

In class 5

#Import excel files

pacman::p_load(readxl, httr)

fL <- "https://www.ris.gov.tw/documents/data/en/3/History-Table-8-2020.xls"
GET(fL, write_disk(tf <- tempfile(fileext=".xls")))

## Response [https://www.ris.gov.tw/documents/data/en/3/History-Table-8-2020.xls]
##   Date: 2021-10-31 11:00
##   Status: 200
##   Content-Type: application/vnd.ms-excel
##   Size: 34.3 kB
## <ON DISK>  C:\Users\USER\AppData\Local\Temp\Rtmpy25ujZ\fileb2017cd77e2.xls

Marriage <-read_excel(tf, sheet=1)

## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5

dplyr::glimpse(Marriage )

## Rows: 55
## Columns: 5
## $ `Table 8. Couples of Marriages, Divorces, Crude Marriage Rate and Crude Divorce Rate` <chr> ~
## $ ...2                                                                                  <chr> ~
## $ ...3                                                                                  <chr> ~
## $ ...4                                                                                  <chr> ~
## $ ...5                                                                                  <chr> ~

#Confirm the field to be deleted

View(Marriage)

Marriage1<- Marriage[-c(1:3,50:55),c(1,5) ] |> as.data.frame()

head(Marriage1)

##   Table 8. Couples of Marriages, Divorces, Crude Marriage Rate and Crude Divorce Rate
## 1                                                                                1975
## 2                                                                                1976
## 3                                                                                1977
## 4                                                                                1978
## 5                                                                                1979
## 6                                                                                1980
##                  ...5
## 1 0.46000000000000002
## 2                 0.5
## 3 0.56000000000000005
## 4 0.64000000000000001
## 5 0.72999999999999998
## 6 0.77000000000000002

View(Marriage1)

colnames(Marriage1) <- c("Year ","Crude Divorce Rate")

#plot: the trend of the crude divorce rate over the years.

plot(Marriage1, xlab ="Year", ylab = "Crude Divorce Rate")