#
# An R script for IQ_Beh data set
#

#read the file
dta <- read.table("C:/Users/5A88/Desktop/IQ_Beh.txt", header = T, row.names = 1)

# display the internal structure of an R object
str(dta)
## 'data.frame':    94 obs. of  3 variables:
##  $ Dep: Factor w/ 2 levels "D","N": 2 2 2 2 1 2 2 2 2 2 ...
##  $ IQ : int  103 124 124 104 96 92 124 99 92 116 ...
##  $ BP : int  4 12 9 3 3 3 6 4 3 9 ...
# display the first parts of a vector, matrix, table, data frame or function. 
head(dta)
##   Dep  IQ BP
## 1   N 103  4
## 2   N 124 12
## 3   N 124  9
## 4   N 104  3
## 5   D  96  3
## 6   N  92  3
# show the class of the data which includes data frame, vector, factor, matrix..etc.
class(dta)
## [1] "data.frame"
#The dim function of the R programming language returns the dimension (e.g. the number of columns and rows) of a matrix, array or data frame.
dim(dta)
## [1] 94  3
#get the names of the variables
names(dta)
## [1] "Dep" "IQ"  "BP"
# is the varible "BP" vector? 
is.vector(dta$BP)
## [1] TRUE
#call out the data of the first row
dta[1, ]
##   Dep  IQ BP
## 1   N 103  4
#call out the IQ data, from 1-3
dta[1:3, "IQ"]
## [1] 103 124 124
#According to the order of BP, call out those data from the last  
tail(dta[order(dta$BP), ])
##    Dep  IQ BP
## 16   N  89 11
## 58   N 117 11
## 66   N 126 11
## 2    N 124 12
## 73   D  99 13
## 12   D  22 17
#According to the order of BP, call out the first 4 data
tail(dta[order(-dta$BP), ], 4)
##    Dep  IQ BP
## 77   N 124  1
## 80   N 121  1
## 24   N 106  0
## 75   N 122  0
# draw a histogram with IQ on the xlab, frequency on the ylab
with(dta, hist(IQ, xlab = "IQ", main = ""))

#draw a boxplot which describes the depression and nondepression group with different scores.
boxplot(BP ~ Dep, data = dta, 
        xlab = "Depression", 
        ylab = "Behavior problem score")

#get a scatter plot
plot(IQ ~ BP, data = dta, pch = 20, col = dta$Dep, 
     xlab = "Behavior problem score", ylab = "IQ")
grid()

#在散佈圖上加上趨勢線
plot(BP ~ IQ, data = dta, type = "n",
     ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)

## end