元素(Ingredients)

1. 變數

R

c(class(5), class(5L), class(5 + 3i),class(T),class("ANDY"))
## [1] "numeric"   "integer"   "complex"   "logical"   "character"
c(typeof(5),typeof(5L),typeof(5 + 3i),typeof(T),typeof("ANDY")) #determines the internal type
## [1] "double"    "integer"   "complex"   "logical"   "character"

python

print(type(5),type(5.5),type(5 + 3j),type(True),type("ANDY"))
## <class 'int'> <class 'float'> <class 'complex'> <class 'bool'> <class 'str'>

2. 變數運算

R

c( 3 + T * 2 - F , paste0("莊","小良")) # T=1 and F=0 but "莊" + " 小良" = Error
## [1] "5"                        "<U+838A><U+5C0F><U+826F>"

python

print(1.0 == True) # True
print(0 == False) # True
print(1.2 + True) # 2.2
print(3 + True * 2) # 5
print("莊" +"小良") #
## True
## True
## 2.2
## 5
## <U+838A><U+5C0F><U+826F>

3. 指派給物件的運算子

R

money <- 30 
spend <- 20 
money1 = money - spend 
money2 = money + spend 
money3 = money * spend 
money4 = money / spend 
money5 = money %% spend 
c(money1, money2, money3, money4, money5)
## [1]  10.0  50.0 600.0   1.5  10.0

python

money1 = 30;money2 = 30;money3 = 30;money4 = 30;money5 = 30
spend = 20 
money1 -= spend
money2 += spend
money3 *= spend
money4 /= spend
money5 %= spend
print(money1, money2, money3, money4, money5)
## 10 50 600 1.5 10

4. 轉換物件性質

R

c(
as.numeric(T),
as.integer(T),
as.complex(T),
as.logical(T),
as.character(T),
paste0("我有",4,"顆蘋果")
)
## [1] "1"                                        
## [2] "1"                                        
## [3] "1+0i"                                     
## [4] "TRUE"                                     
## [5] "TRUE"                                     
## [6] "<U+6211><U+6709>4<U+9846><U+860B><U+679C>"

python

print(type(float(True)))
print(type(int(True)))
print(type(complex(True)))
print(type(bool(True)))
print(type(str(True)))
print("我有 " + str(4) +" 顆蘋果")
## <class 'float'>
## <class 'int'>
## <class 'complex'>
## <class 'bool'>
## <class 'str'>
## <U+6211><U+6709> 4 <U+9846><U+860B><U+679C>

集合 (Collections)

1. R向量vector

R

my_vector = c("1.vector","2.factor","3.matrix","4.data frame","5.list")
my_vector 
## [1] "1.vector"     "2.factor"     "3.matrix"     "4.data frame"
## [5] "5.list"
my_vector[1]
## [1] "1.vector"

2. R因子factor

R

my_factor = factor(c("1.vector","2.factor","3.matrix","4.data frame","5.list"))
my_factor_ordinal = factor(c("1.vector","2.factor","3.matrix","4.data frame","5.list"), ordered = T, levels = c("1.vector","2.factor","3.matrix","4.data frame","5.list"))
my_factor
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
my_factor[1]
## [1] 1.vector
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
my_factor_ordinal
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector < 2.factor < 3.matrix < 4.data frame < 5.list

3. R矩陣matrix

R

my_matrix = matrix(c("1.vector","2.factor","3.matrix","4.data frame","5.list"), nrow=5, ncol=3)
my_matrix
##      [,1]           [,2]           [,3]          
## [1,] "1.vector"     "1.vector"     "1.vector"    
## [2,] "2.factor"     "2.factor"     "2.factor"    
## [3,] "3.matrix"     "3.matrix"     "3.matrix"    
## [4,] "4.data frame" "4.data frame" "4.data frame"
## [5,] "5.list"       "5.list"       "5.list"
my_matrix[1,]
## [1] "1.vector" "1.vector" "1.vector"
my_matrix[,1]
## [1] "1.vector"     "2.factor"     "3.matrix"     "4.data frame"
## [5] "5.list"
my_matrix[1,1]
## [1] "1.vector"

4. R資料框data.frame

R

my_data_frame = data.frame(type = c("1.vector","2.factor","3.matrix","4.data frame","5.list"),dim = c(1,1,2,2,"unlimited"),level = c(F,T,F,F,F),order = c(1:5))

my_data_frame#每欄向量長度必須向同
##           type       dim level order
## 1     1.vector         1 FALSE     1
## 2     2.factor         1  TRUE     2
## 3     3.matrix         2 FALSE     3
## 4 4.data frame         2 FALSE     4
## 5       5.list unlimited FALSE     5
my_data_frame$type
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
my_data_frame[,1]
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
my_data_frame$dim[5]
## [1] unlimited
## Levels: 1 2 unlimited
my_data_frame[5,2]
## [1] unlimited
## Levels: 1 2 unlimited
my_data_frame["type"]
##           type
## 1     1.vector
## 2     2.factor
## 3     3.matrix
## 4 4.data frame
## 5       5.list
sapply(my_data_frame, FUN = class) # 每一欄可以有不同屬性的向量
##      type       dim     level     order 
##  "factor"  "factor" "logical" "integer"
my_data_frame[my_data_frame$dim==2,] #超強的邏輯"列"篩選
##           type dim level order
## 3     3.matrix   2 FALSE     3
## 4 4.data frame   2 FALSE     4
my_data_frame[,c("type","dim")] #超強的邏輯"欄"篩選
##           type       dim
## 1     1.vector         1
## 2     2.factor         1
## 3     3.matrix         2
## 4 4.data frame         2
## 5       5.list unlimited
dim(my_data_frame) # 回傳列欄
## [1] 5 4
str(my_data_frame) # 回傳結構
## 'data.frame':    5 obs. of  4 variables:
##  $ type : Factor w/ 5 levels "1.vector","2.factor",..: 1 2 3 4 5
##  $ dim  : Factor w/ 3 levels "1","2","unlimited": 1 1 2 2 3
##  $ level: logi  FALSE TRUE FALSE FALSE FALSE
##  $ order: int  1 2 3 4 5

5. R清單(list)

R

all_type = c("1.vector","2.factor","3.matrix","4.data frame","5.list")
my_list = list(my_vector = all_type,
                     my_factor = factor(all_type),
                     my_matrix = matrix(all_type, nrow=5, ncol=3),
                     my_data_frame = data.frame(all_type,dim = c(1,1,2,2,"unlimited")))
my_list#可以裝不同集合的資料
## $my_vector
## [1] "1.vector"     "2.factor"     "3.matrix"     "4.data frame"
## [5] "5.list"      
## 
## $my_factor
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
## 
## $my_matrix
##      [,1]           [,2]           [,3]          
## [1,] "1.vector"     "1.vector"     "1.vector"    
## [2,] "2.factor"     "2.factor"     "2.factor"    
## [3,] "3.matrix"     "3.matrix"     "3.matrix"    
## [4,] "4.data frame" "4.data frame" "4.data frame"
## [5,] "5.list"       "5.list"       "5.list"      
## 
## $my_data_frame
##       all_type       dim
## 1     1.vector         1
## 2     2.factor         1
## 3     3.matrix         2
## 4 4.data frame         2
## 5       5.list unlimited
my_list$my_vector
## [1] "1.vector"     "2.factor"     "3.matrix"     "4.data frame"
## [5] "5.list"
my_list[["my_factor"]]
## [1] 1.vector     2.factor     3.matrix     4.data frame 5.list      
## Levels: 1.vector 2.factor 3.matrix 4.data frame 5.list
my_list[[1]]
## [1] "1.vector"     "2.factor"     "3.matrix"     "4.data frame"
## [5] "5.list"

5. R邏輯(logical)

R

andy_hansome = c(TRUE,FALSE)
andy_fat = c(TRUE,FALSE)
t = c(1,2,3)
c(
andy_hansome|andy_fat, #exclusive or on colum 
xor(andy_hansome,andy_fat), #exclusive or on row
andy_hansome||andy_fat, #as any()
is.numeric(t)
)
## [1]  TRUE FALSE FALSE FALSE  TRUE  TRUE

1. python彈性清單list

python

my_str = ["list","tuple","dictionary"]
my_float = [4]
my_bool = [True]
my_list = [my_str,my_float,my_bool]

print(my_list)
print(my_list[0])# 第一個資料是從0開始;字串
print(type(my_list[1]))# 數字
print(type(my_list[2]))# 布林
print(my_list[0][1])# 第一個集合的第一個元素
my_list.insert(3,"new") #新增第三個集合 "new"
print(my_list)
## [['list', 'tuple', 'dictionary'], [4], [True]]
## ['list', 'tuple', 'dictionary']
## <class 'list'>
## <class 'list'>
## tuple
## [['list', 'tuple', 'dictionary'], [4], [True], 'new']

2. python固定清單tuple

python

my_str = ["list","tuple","dictionary"]
my_float = [4]
my_bool = [True]
my_list = [my_str,my_float,my_bool]

my_list_tuple = tuple(my_list) #tuple() it can`t takes at most 1 argument 
print("my_list_tuple.insert(3,'new')" + "  (AttributeError : tuple物件不能新增)")
## my_list_tuple.insert(3,'new')  (AttributeError : tuple<U+7269><U+4EF6><U+4E0D><U+80FD><U+65B0><U+589E>)

3. python 字典dictionary

python

my_str = ["list","tuple","dictionary"]
my_float = [4]
my_bool = [True]
my_dictionary = {
"my_str" : my_str,
"my_float" : my_float,
"my_bool" : my_bool
}
print(my_dictionary)
print(my_dictionary["my_str"])
print("my_dictionary[0]" + " (KeyError: 無法用位置呼叫)")
## {'my_str': ['list', 'tuple', 'dictionary'], 'my_float': [4], 'my_bool': [True]}
## ['list', 'tuple', 'dictionary']
## my_dictionary[0] (KeyError: <U+7121><U+6CD5><U+7528><U+4F4D><U+7F6E><U+547C><U+53EB>)

4. python 資料框pandas


import pandas as pd

my_str = ["list","tuple","dictionary"]
my_float = [4,5,6]
my_bool = [True,True,False]
my_dictionary = {
"my_str" : my_str,
"my_float" : my_float,
"my_bool" : my_bool
}

pd_my_dictionary= pd.DataFrame(my_dictionary)
print(type(pd_my_dictionary))
print(pd_my_dictionary)
print("------------------------------------------------1")
pd_my_dictionary.columns = ["bool","float","str"]
pd_my_dictionary.index = ["a","b","c"]
print(pd_my_dictionary)
print("------------------------------------------------2")
print(pd_my_dictionary["bool"])
print("------------------------------------------------3")
print(pd_my_dictionary[["bool","str"]])
print("------------------------------------------------4")
print(pd_my_dictionary.iloc[:,0])
print("------------------------------------------------5")
print(pd_my_dictionary.iloc[0:1,1])
print("------------------------------------------------6")
print(pd_my_dictionary.shape)
print("------------------------------------------------7")
print(pd_my_dictionary.columns)
print("------------------------------------------------8")
print(pd_my_dictionary.index)
print("------------------------------------------------9")
print(pd_my_dictionary.head(2))
print("------------------------------------------------10")
print(pd_my_dictionary.describe())
## <class 'pandas.core.frame.DataFrame'>
##    my_bool  my_float      my_str
## 0     True         4        list
## 1     True         5       tuple
## 2    False         6  dictionary
## ------------------------------------------------1
##     bool  float         str
## a   True      4        list
## b   True      5       tuple
## c  False      6  dictionary
## ------------------------------------------------2
## a     True
## b     True
## c    False
## Name: bool, dtype: bool
## ------------------------------------------------3
##     bool         str
## a   True        list
## b   True       tuple
## c  False  dictionary
## ------------------------------------------------4
## a     True
## b     True
## c    False
## Name: bool, dtype: bool
## ------------------------------------------------5
## a    4
## Name: float, dtype: int64
## ------------------------------------------------6
## (3, 3)
## ------------------------------------------------7
## Index(['bool', 'float', 'str'], dtype='object')
## ------------------------------------------------8
## Index(['a', 'b', 'c'], dtype='object')
## ------------------------------------------------9
##    bool  float    str
## a  True      4   list
## b  True      5  tuple
## ------------------------------------------------10
##        float
## count    3.0
## mean     5.0
## std      1.0
## min      4.0
## 25%      4.5
## 50%      5.0
## 75%      5.5
## max      6.0

5. python 因子pandas

import pandas as pd 
python_type = pd.Categorical(["tuple","[ ] as list", "dictionary","pandas as data.frame", "numpy"])
python_type_Catergories = pd.Categorical(python_type, categories = ["tuple","[ ] as list", "dictionary","pandas as data.frame", "numpy"], ordered = True)
print(type(python_type))
print(python_type)
print(python_type_Catergories)
## <class 'pandas.core.categorical.Categorical'>
## [tuple, [ ] as list, dictionary, pandas as data.frame, numpy]
## Categories (5, object): [[ ] as list, dictionary, numpy, pandas as data.frame, tuple]
## [tuple, [ ] as list, dictionary, pandas as data.frame, numpy]
## Categories (5, object): [tuple < [ ] as list < dictionary < pandas as data.frame < numpy]

運算 (Operation)

1. 向量與矩陣計算

R

my_vector = c(5,10,15,30)
my_vector30 = my_vector * 30
my_matrix = matrix(my_vector,ncol=2,nrow=2)
list(my_vector = my_vector,my_vector30=my_vector30, my_matrix = my_matrix,my_matrix30 = my_matrix*30)
## $my_vector
## [1]  5 10 15 30
## 
## $my_vector30
## [1] 150 300 450 900
## 
## $my_matrix
##      [,1] [,2]
## [1,]    5   15
## [2,]   10   30
## 
## $my_matrix30
##      [,1] [,2]
## [1,]  150  450
## [2,]  300  900

python

import numpy as np 

my_list_vector = [5,10,15,30]
my_list_vector_np = np.array([5,10,15,30]) 

my_list_vector_np_30 = my_list_vector_np * 30
print(my_list_vector_np)
print(my_list_vector_np_30)


my_list_matrix = [[5,15],
                   [10,30]]
my_list_matrix_np = np.array([[5,15],
                              [10,30]])

my_list_matrix_np_30 = my_list_matrix_np *30
print(my_list_matrix_np)
print(my_list_matrix_np_30)
## [ 5 10 15 30]
## [150 300 450 900]
## [[ 5 15]
##  [10 30]]
## [[150 450]
##  [300 900]]

2. 邏輯篩選


向量


R

my_vector = c(5,10,15,30)
names(my_vector) = c("small", "tall", "grande","venti")
list(my_vector = my_vector, 
     my_SIZE_1 = my_vector[my_vector<15],
     my_SIZE_2 = my_vector[names(my_vector)=="small"|names(my_vector)=="tall"],
     my_SIZE_names = names(my_vector[my_vector<15])
     )
## $my_vector
##  small   tall grande  venti 
##      5     10     15     30 
## 
## $my_SIZE_1
## small  tall 
##     5    10 
## 
## $my_SIZE_2
## small  tall 
##     5    10 
## 
## $my_SIZE_names
## [1] "small" "tall"

python

import numpy as np
import pandas as pd
my_vector_py =  [5,10,15,20] 
my_vector_py_np = np.array(my_vector_py)
my_vector_py_pd = pd.Series([5,10,15,20])#same


print(my_vector_py_np)
print(my_vector_py_pd)
print(my_vector_py_np[[0,1]])
print(my_vector_py_np[my_vector_py_np < 15])
## [ 5 10 15 20]
## 0     5
## 1    10
## 2    15
## 3    20
## dtype: int64
## [ 5 10]
## [ 5 10]


矩陣


R

my_matrix = matrix(c(5,10,15,30,40,50),2,3)
matrix_frame = data.frame(my_matrix)
names(matrix_frame) = c("cheap","normal","expensive")

list(length = length(my_matrix),
     dimension = dim(my_matrix),
     my_matrix = my_matrix, 
     my_SIZE_1 = my_matrix[my_matrix<15],
     matrix_frame = matrix_frame,
     cheapest = matrix_frame[,c("cheap")]<10,
     cheapest = matrix_frame[matrix_frame$cheap<10,]
     )
## $length
## [1] 6
## 
## $dimension
## [1] 2 3
## 
## $my_matrix
##      [,1] [,2] [,3]
## [1,]    5   15   40
## [2,]   10   30   50
## 
## $my_SIZE_1
## [1]  5 10
## 
## $matrix_frame
##   cheap normal expensive
## 1     5     15        40
## 2    10     30        50
## 
## $cheapest
## [1]  TRUE FALSE
## 
## $cheapest
##   cheap normal expensive
## 1     5     15        40

python

import numpy as np
import pandas as pd
my_matrix_py = [[5,15,40],
                 [10,30,20]]
my_matrix_py_np = np.array(my_matrix_py)



matrix_frame = pd.DataFrame(my_matrix_py_np, columns = ["cheap","normal","expensive"], index = ["a","b"] )
print(my_matrix_py_np[my_matrix_py_np < 15])
print("------------------------------------------------1")
print(my_matrix_py_np.size)
print("------------------------------------------------2")
print(my_matrix_py_np.shape)
print("------------------------------------------------3")
print(matrix_frame)
print("------------------------------------------------4")
print(matrix_frame["cheap"][matrix_frame.loc[:,"cheap"]<10])
print("------------------------------------------------5")
print(matrix_frame[matrix_frame.loc[:,"cheap"]<10])
print("------------------------------------------------6")
## [ 5 10]
## ------------------------------------------------1
## 6
## ------------------------------------------------2
## (2, 3)
## ------------------------------------------------3
##    cheap  normal  expensive
## a      5      15         40
## b     10      30         20
## ------------------------------------------------4
## a    5
## Name: cheap, dtype: int64
## ------------------------------------------------5
##    cheap  normal  expensive
## a      5      15         40
## ------------------------------------------------6

3. 迴圈

R

loop_frame = data.frame(
order = c(1:4),
pool = c(5:8),
loop = c("for", "while", "repeat", "apply")
)

wide = data.frame(mean(loop_frame[,1]), mean(loop_frame[,2]))
names(wide) = c(names(loop_frame)[1:2])
print(wide)
##   order pool
## 1   2.5  6.5
wide=c()
for(i in 1:2){
  wide = c(wide,mean(loop_frame[,i]))
  names(wide) = c(names(loop_frame)[1:i])
  };wide
## order  pool 
##   2.5   6.5
wide = c(); i=1
while(i < 3){
wide = c(wide,mean(loop_frame[,i]))
names(wide) = c(names(loop_frame)[1:i])
i = i+1
};wide
## order  pool 
##   2.5   6.5
wide = c()
i=1
repeat{
wide = c(wide,mean(loop_frame[,i]))
names(wide) = c(names(loop_frame)[1:i]) 
i = i+1 
if(i>2)break
};wide
## order  pool 
##   2.5   6.5
(wide = sapply(loop_frame[,1:2],mean))
## order  pool 
##   2.5   6.5
class(wide)
## [1] "numeric"
(wide = apply(loop_frame[,1:2],2,mean))
## order  pool 
##   2.5   6.5
class(wide)
## [1] "numeric"
(wide = lapply(loop_frame[,1:2],mean))
## $order
## [1] 2.5
## 
## $pool
## [1] 6.5
class(wide)
## [1] "list"
wide=c()
#use seq_along much batter than 1:length or 1:ncol when the data is empty
for(i in seq_along(loop_frame$order)){ 
  wide = c(wide,(loop_frame[i,1]+loop_frame[i,2])/2)
  };wide
## [1] 3 4 5 6
(long = apply(loop_frame[,1:2],1,mean))
## [1] 3 4 5 6
class(long)
## [1] "numeric"

python

import numpy as np
import pandas as pd 

loop_frame = {
"order" : [1,2,3,4],
"pool" : [5,6,7,8],
"loop" : ["for", "while", "pd", "np"]
}

pd_loop_frame = pd.DataFrame(loop_frame)
print(pd_loop_frame[["order","pool"]])
print("---------------------------------0")

print(np.array(pd_loop_frame[["order","pool"]]))
print("---------------------------------1")

print(np.mean(np.array(pd_loop_frame[["order"]])))
print(np.mean(np.array(pd_loop_frame[["pool"]])))

print("---------------------------------2")
for i in [1,2]:
  print(np.mean(np.array(pd_loop_frame.iloc[:, [i]])))   

print("---------------------------------3")
for i in [1,2]:
  print(np.mean(pd_loop_frame.iloc[:, [i]])) 

print("---------------------------------4")
print(np.mean(pd_loop_frame[["order","pool"]][ 0:4 ]))

print("---------------------------------5")
data = np.mean(pd_loop_frame[["order","pool"]][ 0:4 ])

for i in list(range(len(data))):
  print(data[i])
  
print("---------------------------------6")  

i = 0
while i < len(data):
  print(data[i])
  i += 1
##    order  pool
## 0      1     5
## 1      2     6
## 2      3     7
## 3      4     8
## ---------------------------------0
## [[1 5]
##  [2 6]
##  [3 7]
##  [4 8]]
## ---------------------------------1
## 2.5
## 6.5
## ---------------------------------2
## 2.5
## 6.5
## ---------------------------------3
## order    2.5
## dtype: float64
## pool    6.5
## dtype: float64
## ---------------------------------4
## order    2.5
## pool     6.5
## dtype: float64
## ---------------------------------5
## 2.5
## 6.5
## ---------------------------------6
## 2.5
## 6.5

4. 條件迴圈

R

loop_frame = data.frame(
order = c(1:4),
pool = c(5:8),
loop = c("for", "while", "repeat", "apply")
)

for(i in 1:2){
  if( (t = mean(loop_frame[,i]) ) < 5){
    print(paste0(t, " is order"))}else if (t > 5){
      print(paste(t," is pool",sep = "") )
    }else{
      print(paste(t," is non",sep = "") )
    } 
}
## [1] "2.5 is order"
## [1] "6.5 is pool"
for(i in 1:2){
  a = as.character( (t = mean(loop_frame[,i])) %% 2.5)
  switch(a,
         "0" = print(paste0(t, " is order")),
         "1.5" = print(paste0(t, " is pool")))
}
## [1] "2.5 is order"
## [1] "6.5 is pool"

python

import numpy as np
import pandas as pd 

loop_frame = {
"order" : [1,2,3,4],
"pool" : [5,6,7,8],
"loop" : ["for", "while", "pd", "np"]
}

pd_loop_frame = pd.DataFrame(loop_frame)

data = np.mean(pd_loop_frame[["order","pool"]])
for i in list(range(len(data))):
  if(data[i] < 5):
    print(str(data[i]) + " is order")
  elif(data[i] > 5):
    print(str(data[i]) + " is pool")
  else:
    print(str(data[i]) + " is non")
## 2.5 is order
## 6.5 is pool

5. 流程中止或跳過

R

for(i in seq(1,10,2)){
  if(i == 5) {
    next
  }else if(i > 7){
    break
  }else{
    print(i)
}
}
## [1] 1
## [1] 3
## [1] 7

python


for i in list(range(1,10,2)):
  if(i == 5):
    continue
  elif(i > 7):
    break
  else:
    print(i)
## 1
## 3
## 7

6. 內建函數

查詢R內建排序函數

help(sort) #or you can put the mouse and press F1 or F2 before the first round brackets
?sort

查詢python內建排序函數

help(sorted)
## Help on built-in function sorted in module builtins:
## 
## sorted(iterable, /, *, key=None, reverse=False)
##     Return a new list containing all items from the iterable in ascending order.
##     
##     A custom key function can be supplied to customize the sort order, and the
##     reverse flag can be set to request the result in descending order.

使用R內建排序函數

(tg= data.frame(
t = c(6,2,3,1,23,4,5,7),
g = c("t6","a2","b3","t1","gg23","tt4","c5","u7")
))
##    t    g
## 1  6   t6
## 2  2   a2
## 3  3   b3
## 4  1   t1
## 5 23 gg23
## 6  4  tt4
## 7  5   c5
## 8  7   u7
sort(tg$t, decreasing = T) # decreasing sort value
## [1] 23  7  6  5  4  3  2  1
(torder = order(tg$t, decreasing = T)) #decreaing sort postion
## [1] 5 8 1 7 6 3 2 4
tg[torder,] #sorted the data.frame
##    t    g
## 5 23 gg23
## 8  7   u7
## 1  6   t6
## 7  5   c5
## 6  4  tt4
## 3  3   b3
## 2  2   a2
## 4  1   t1

使用python內建排序函數

import numpy as np
import pandas as pd
n = {
"t" : [6,2,3,1,23,4,5,7],
"g" : ["t6","a2","b3","t1","gg23","tt4","c5","u7"]}

print("----------------1.dictionary")
print(n)
print("----------------2.pd.DataFrame")
print(pd.DataFrame(n))
print("----------------3.np.array")
print(np.array(pd.DataFrame(n)))
print("----------------4.sorted number")
print(sorted(n["t"], reverse = True))
print("----------------5.sorted position")
print(np.argsort(n["t"]))
print("----------------6.sorted pd.DataFrame")
print(np.array(pd.DataFrame(n))[np.argsort(n["t"])[::-1]])#[::-1]reverse 
## ----------------1.dictionary
## {'t': [6, 2, 3, 1, 23, 4, 5, 7], 'g': ['t6', 'a2', 'b3', 't1', 'gg23', 'tt4', 'c5', 'u7']}
## ----------------2.pd.DataFrame
##       g   t
## 0    t6   6
## 1    a2   2
## 2    b3   3
## 3    t1   1
## 4  gg23  23
## 5   tt4   4
## 6    c5   5
## 7    u7   7
## ----------------3.np.array
## [['t6' 6]
##  ['a2' 2]
##  ['b3' 3]
##  ['t1' 1]
##  ['gg23' 23]
##  ['tt4' 4]
##  ['c5' 5]
##  ['u7' 7]]
## ----------------4.sorted number
## [23, 7, 6, 5, 4, 3, 2, 1]
## ----------------5.sorted position
## [3 1 2 5 6 0 7 4]
## ----------------6.sorted pd.DataFrame
## [['gg23' 23]
##  ['u7' 7]
##  ['t6' 6]
##  ['c5' 5]
##  ['tt4' 4]
##  ['b3' 3]
##  ['a2' 2]
##  ['t1' 1]]

7. 自訂函數

R arguments_body_Environment

fun = function(arg1,grg2){
  body
}

Anatomy = function(x, y = 1){
  x+y
}

formals(Anatomy)
## $x
## 
## 
## $y
## [1] 1
body(Anatomy)
## {
##     x + y
## }
environment(Anatomy) 
## <environment: R_GlobalEnv>
#function are object!!
mean2 = mean
(function(x, y = 1){mean2(x+y)})(1)
## [1] 2

R自訂函數

plus_pi = function(x){
  y = x+pi
  return(paste0("result  = ", y ))
}
plus_pi(1)
## [1] "result  = 4.14159265358979"

python自訂函數

import math #for pi

def plus_pi(x):
    x += math.pi
    return "result = " + str(x)

print(plus_pi(1))
## result = 4.141592653589793

練習-自訂R排序函數

sort_order = function(x,decreasing = F,order = F){
  #1.make perimary order
  t = data.frame(t = x, position = c(1:length(x))) 
  #2.start sort data 
  if(decreasing == T ){   
   for(i in 2:length(t$t)){
    for(j in (i-1):1){
    if(t[i,1]>t[j,1]){   #bigger number should foreward move
    p = t[j,]
    t[j,] = t[i,]
    t[i,] = p
    i=i-1
    }else{
     break
    }
    }
   }
  }else{
    for(i in 2:length(t$t)){
    for(j in (i-1):1){
    if(t[i,1]<t[j,1]){   #smaller number should foreward move
    p = t[j,]
    t[j,] = t[i,]
    t[i,] = p
    i=i-1
    }else{
     next
    }
    }
   }
  }
  #3.return position or value
  if(order == T){
    return(t$position)
  }else{
    return(t$t)
  }
}

#create a new data
tg= data.frame(
t = c(6,2,3,1,23,4,5,7),
g = c("t6","a2","b3","t1","gg23","tt4","c5","u7")
)

#test function
sort_order(tg$t)
## [1]  1  2  3  4  5  6  7 23
#decreasing sort
sort_order(tg$t, decreasing = T,order = F)
## [1] 23  7  6  5  4  3  2  1
#decreasing sort position
(od = sort_order(tg$t, decreasing = T,order = T))
## [1] 5 8 1 7 6 3 2 4
#decreasing sort data.frame
tg[od,]
##    t    g
## 5 23 gg23
## 8  7   u7
## 1  6   t6
## 7  5   c5
## 6  4  tt4
## 3  3   b3
## 2  2   a2
## 4  1   t1

練習-自訂python排序函數

  
def sort_order(x, reverse = False, order = False):
  import numpy as np
  import pandas as pd
#1.make perimary order
  t = { "t" : x, "o" : range(0,len(x)) }
  t = np.array(pd.DataFrame(t))
  if reverse == True:
    for j in range(0,len(x)):
      for i in range(0,j):
        while i>=0:
          if t[i+1,1] > t[i,1]:
             g = t[[i,]]
             t[[i,]] = t[[i+1,]]
             t[[i+1,]] = g
             i -= 1
          else : 
             i -=1

  if reverse == False:
    for j in range(0,len(x)):
      for i in range(0,j):
        while i>=0:
          if t[i+1,1] < t[i,1]:
             g = t[[i,]]
             t[[i,]] = t[[i+1,]]
             t[[i+1,]] = g
             i -= 1
          else : 
             i -=1

  if order == False:
    s =  t[0:len(t),1]
  else: 
    s = t[0:len(t),0]



  return s
#test function
x = [6,2,3,1,23,4,5,7]
print(x)
print("------------------------------------normal")
print(sort_order(x,reverse = True, order = False))
print("------------------------------------reverse")
print(sort_order(x,reverse = False, order = False))
print("------------------------------reverse & return_position")
print(sort_order(x,reverse = False, order = True))
## [6, 2, 3, 1, 23, 4, 5, 7]
## ------------------------------------normal
## [23  7  6  5  4  3  2  1]
## ------------------------------------reverse
## [ 1  2  3  4  5  6  7 23]
## ------------------------------reverse & return_position
## [3 1 2 5 6 0 7 4]

8. 函式回傳封包(多筆資料)

R list-funcition return

sort_order = function(x){
  #1.make perimary order
  name = "andy_practice"
  t = data.frame(data = x, position = c(1:length(x)))
  #2.start return list data 
  return(list(name = name,file = t))
}
#create a vector data
g = c(6,2,3,1,23,4,5,7)

#test function
(muitle_return = sort_order(g))
## $name
## [1] "andy_practice"
## 
## $file
##   data position
## 1    6        1
## 2    2        2
## 3    3        3
## 4    1        4
## 5   23        5
## 6    4        6
## 7    5        7
## 8    7        8
#string (name)
muitle_return$name
## [1] "andy_practice"
#data.frame
muitle_return["file"]
## $file
##   data position
## 1    6        1
## 2    2        2
## 3    3        3
## 4    1        4
## 5   23        5
## 6    4        6
## 7    5        7
## 8    7        8
#class 
class(muitle_return)
## [1] "list"

python tuble-function return

  
def sort_order(x, reverse = False, order = False):
  import numpy as np
  import pandas as pd
#1.make perimary order
  name = ["andy_practice"]
  data = { "t" : x, "o" : range(0,len(x)) }
  file = np.array(pd.DataFrame(data))
  return name,file
#test function
x = [6,2,3,1,23,4,5,7]
name,file = sort_order(x)

print("name:",name,"\n","data:",file )
print("---------------------------")
print(type(sort_order(x)))
## name: ['andy_practice'] 
##  data: [[ 0  6]
##  [ 1  2]
##  [ 2  3]
##  [ 3  1]
##  [ 4 23]
##  [ 5  4]
##  [ 6  5]
##  [ 7  7]]
## ---------------------------
## <class 'tuple'>

9. 函式數值、巢型以及錯誤排除

R scope(local/global), nested_function, Error_Handling

external = 1 #global variable
S_N_E = function(x){
  tryCatch(
    {
    internal = 0 #local variable
    t = function(y){   
      return(y+internal)
    } #nested function
    return(x+t(external))
    }, #Using tryCatch for Error Handling 
    error = function(e) {
      print(paste0(x, " sould be a numeric variable"))
    }
  )
}

#test S_N_E function 
S_N_E(external)
## [1] 2
tryCatch(internal,error = function(e) {print("the internal vriable didn't exist")})
## [1] "the internal vriable didn't exist"
S_N_E("1")
## [1] "1 sould be a numeric variable"

python scope(local/global), nested_function, Error_Handling

external = 1 #global variable

def S_N_E(x):
  "practice those concepts of scope, nested function and error handling" 
  try:
    internal = 0 #local variable
    def t(y):  #nested function
        return y + internal
    return x + t(external)
  
  except: #error handling
      print(str(x) + " sould be a numeric variable")
  


#test S_N_E function 
print(S_N_E(1))

try:
  S_N_E(internal)
except:  
  print("the internal vriable didn't exist")
  
S_N_E("external")
## 2
## the internal vriable didn't exist
## external sould be a numeric variable

10. 彈性參數flixible arguments

R

andy = function(x){
  for (i in x)
    print(i+1)
}
andy(c(1,2,3,4))
## [1] 2
## [1] 3
## [1] 4
## [1] 5
andytry = function(x){
  for (n in names(x))
    print(sub(" ","",paste0(n,":", n = x[n]))) #return value is last executed value
}
andytry(data.frame(a = 1, b = 2,c = 3, d = 4))
## [1] "a:1"
## [1] "b:2"
## [1] "c:3"
## [1] "d:4"

python

def andy(x):
  for i in x:
    print(i+1)

def andyeazy(*x):
  for i in x:
    print(i + 1)

def andytry(**x):
  for i in x:
    print(i + ":" + str(x[i]+1))
    
andy( [1,2,3,4] )
print("----------")
andyeazy(1,2,3,4)
print("----------")
andytry(a = 1, b = 2, c = 3, d = 4)
## 2
## 3
## 4
## 5
## ----------
## 2
## 3
## 4
## 5
## ----------
## a:2
## b:3
## c:4
## d:5

物件(Object)

1. R_系統內建物件

R_base_type(builtin)

#Base type
c(typeof(sum), is.primitive(sum))
## [1] "builtin" "TRUE"

2. R_S3物件

3. S4物件

r_strict_type(S4)

#setClass() for S4
setClass("andy3", slots = list( type = "character", check = "character", test = "numeric" ))

#generic funciton
count_O = function(obj) {
          return(sum(obj@test))}
setGeneric("count_O")
## [1] "count_O"
#building object "andy" by new()
Oo= new("andy3",
  type = c("Base", "S3", "S4", "RC"),
  check = c("typeof", "is.prmitive", "pryr", "isS4"),
  test = c(1:8))

#unsing the setMethod("count") building count_O function
setMethod("count_O", "andy3", function(object) {
  sum(object@test)
})
## Warning: For function 'count_O', signature 'andy3': argument in method
## definition changed from (object) to (obj)
## [1] "count_O"
isS4(Oo)
## [1] TRUE
#test the S4 object magic
count_O(Oo)
## [1] 36

4. RC物件

r_like_python_type(RC)

#use setRefClass() for RC and put in function 
andy <- setRefClass("andy", 
            fields = list(
              type = "character",
              check = "character",
              test = "numeric"),
            methods = list(
              count_O = function() {
                return(sum(test))
        }
            ))


#building objec
Oo <- andy(type = c("Base", "S3", "S4", "RC"),
  check = c("typeof", "is.prmitive", "pryr", "isS4"),
  test = c(1:8)
  )

Oo$count_O()
## [1] 36

5. 物件導向的python

python_object constructor

#class, slef, __int__
class andy:
  '''it anmes andy class''' #Doc string 
  def __init__(self, attribute, test):
    self.attribute = attribute
    self.test = test

print(andy)
## <class '__main__.andy'>

python_building a “andy” object

class andy:
  '''it anmes andy class''' #Doc string 
  def __init__(self, attribute, test):
    self.attribute = attribute
    self.test = test

talk = andy("andy_clone", 30)

print(talk)
print("my attribute is " + talk.attribute + "\n" + 
      "my test is " + str(talk.test)  + "\n" + 
      "my class is " + talk.__doc__)

#print all atribute by dir()
print(dir(talk))
## <__main__.andy object at 0x1018d4438>
## my attribute is andy_clone
## my test is 30
## my class is it anmes andy class
## ['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'attribute', 'test']

python_more_function

class andy:
  '''andy class''' #Doc string 
  def __init__(you, attribute, test):
      you.attribute = attribute
      you.test = test 
      
  def get_info(you):
      print(you.attribute + " " +you.test)
    
    
talk = andy("andy_clone is a","superman")

talk.get_info()


#Inheritance
class kavin(andy):
  '''inheritance'''#Doc string
  def print_info(you):
      print(you.test + "is andy_clone")

talk2 = kavin("andy_clone is a","superman")
talk2.print_info()
talk2.get_info()


#Override
class Marry(kavin):
  '''Override'''#Doc string
  def print_info(you):
      print(you.test + "is andy_clone_inheritance_and_Override")

talk3 = Marry("andy_clone is a","superman")
talk3.print_info()
talk3.get_info()
## andy_clone is a superman
## supermanis andy_clone
## andy_clone is a superman
## supermanis andy_clone_inheritance_and_Override
## andy_clone is a superman

環境物件函數-星巴克測試

R_DrinkCostClass

Drinkcost <- function(store,year) {
  #object_composition
  my_beverages = list(
        SubjectName = store,
        year = year,
        get = function(x) my_beverages[[x]],
        delet_cost = function(kind, order){
          my_beverages$cost[[kind]] = my_beverages$cost[[kind]][-order,]
          },
        delet_cost_kind = function(kind) {
          my_beverages$cost = my_beverages$cost[-which(names(my_beverages$cost) == kind)]
          },
        cost = list(),
        My_love_menu = list(),
        getMy_love_menu = function()return(my_beverages$My_love_menu),
        getNumMy_love_menu = function()return(length(my_beverages$My_love_menu))
    )
  
    #affording the object few more functions
    my_beverages$addMy_love_menu = function(name,value) {
        h <- my_beverages$My_love_menu
        h[[name]] = value
        assign('My_love_menu', h, envir=my_beverages)
    }

    my_beverages$addCost = function(DrinkName) {
      if(length(my_beverages$My_love_menu) == 0){
        print("The My_love_menu was not yet to build in object")
        break}
      
       #to find the cost in object 
          money = my_beverages$My_love_menu[[DrinkName]]
          
         #add cost data in object 
          p = my_beverages$cost
          detail = data.frame(cost = as.numeric(money),time = as.character(Sys.time()))
              if(is.null(money)){print("no found, please add the drink to menu")
                }else if(any(DrinkName == names(my_beverages$cost))){
                p[[DrinkName]] = rbind(p[[DrinkName]],detail)
                }else{
                p[[DrinkName]] = detail
                }
                assign('cost', p, envir=my_beverages)
    }
    
#creat an S3 environment
    my_beverages <- list2env(my_beverages)
    class(my_beverages) <- "DrinkCostClass"
    return(my_beverages)
    
}
#' Define S3 generic method for the print function.
print.DrinkCostClass <- function(x) {
    if(class(x) != "DrinkCostClass") stop();
    cat(paste(x$get("SubjectName"),"_" ,x$get("year"), "'s cost", sep=''))
}

#using the DrinkCost_function to create an my "starbuck_cost" object 
(starbuck_cost = Drinkcost("starbuck",2017))
## starbuck_2017's cost
#add specializing fuction for "starbuck_cost" object 
starbuck_cost$starbuck_list_creator = function(kind, kind_cost,size_cost){
sizetype = factor((p=c("tall","grada","venti")),levels = p)
size = sort(rep(sizetype,length(kind)))
drink_names = paste(kind,size,sep = "_")
kind_cost = c(kind_cost,kind_cost+size_cost,kind_cost+size_cost*2)
for(i in 1:length(drink_names)){ 
  starbuck_cost$addMy_love_menu(drink_names[i],kind_cost[i])
}
}

#DrinkCostClass object
class(starbuck_cost)
## [1] "DrinkCostClass"
#my_love_menu is empty
starbuck_cost$My_love_menu
## list()
#I use specialized startbuck_fuction to add my favor menu list
starbuck_cost$starbuck_list_creator(
kind = c("caffee_latte","kenya_extra_shot_latte","caffee_misto"), #my favor kinds
kind_cost = c(125,145,95), #the base (tall) costs on those kinds
size_cost = 15 
#size_cost is needed because the cost possiblely would change in future.
)

#let we check menu now
starbuck_cost$get("My_love_menu")
## $caffee_latte_tall
## [1] 125
## 
## $kenya_extra_shot_latte_tall
## [1] 145
## 
## $caffee_misto_tall
## [1] 95
## 
## $caffee_latte_grada
## [1] 140
## 
## $kenya_extra_shot_latte_grada
## [1] 160
## 
## $caffee_misto_grada
## [1] 110
## 
## $caffee_latte_venti
## [1] 155
## 
## $kenya_extra_shot_latte_venti
## [1] 175
## 
## $caffee_misto_venti
## [1] 125
#I drink a cup of caffee_latte_tall  
starbuck_cost$addCost("caffee_latte_tall")
starbuck_cost$get("cost")
## $caffee_latte_tall
##   cost                time
## 1  125 2017-10-23 02:08:25
##I drink a cup of caffee_latte_tall again
starbuck_cost$addCost("caffee_latte_tall")
starbuck_cost$get("cost")
## $caffee_latte_tall
##   cost                time
## 1  125 2017-10-23 02:08:25
## 2  125 2017-10-23 02:08:25
#I drink a cup of expensive coffee today
starbuck_cost$addCost("kenya_extra_shot_latte_venti")
starbuck_cost$get("cost")
## $caffee_latte_tall
##   cost                time
## 1  125 2017-10-23 02:08:25
## 2  125 2017-10-23 02:08:25
## 
## $kenya_extra_shot_latte_venti
##   cost                time
## 1  175 2017-10-23 02:08:25
#I want to delet the wrong record of a beverage kind
starbuck_cost$delet_cost_kind("kenya_extra_shot_latte_venti")
starbuck_cost$get("cost")
## $caffee_latte_tall
##   cost                time
## 1  125 2017-10-23 02:08:25
## 2  125 2017-10-23 02:08:25
#I want to delet the wrong record of the one of a beverage kind
starbuck_cost$delet_cost(kind = "caffee_latte_tall", order = 2)
starbuck_cost$get("cost")
## $caffee_latte_tall
##   cost                time
## 1  125 2017-10-23 02:08:25