ㄧ、python資料屬性及方法(attributes_methods)

1. python變數的基本屬性或方法

python_float

#float belongs "numbers.Real" class and inheritance "abstract base" 
float = 3.2
#Method 
print(float.as_integer_ratio()) #pair of integer whose is exactly equal float 3.2
## (3602879701896397, 1125899906842624)
print(float.is_integer()) #boolean
## False
print(float.hex()) #Hexadecimal
## 0x1.999999999999ap+1
print(float.fromhex(float.hex())) #translate to integer
## 3.2

python_int

#integer belongs "numbers.Integer" class and inheritance "abstract base" 
my_int = 87
#Method 
#1. float
print(my_int.bit_length()) 
## 7
print(my_int.to_bytes(length = 10, byteorder = "big")) 
## b'\x00\x00\x00\x00\x00\x00\x00\x00\x00W'
print(my_int.from_bytes( my_int.to_bytes(length = 10, byteorder = "big"),
                                                      byteorder = "big")) 
## 87

python_complex

#complex belongs "numbers.Complex"
my_complex = 8 + 7j
print(my_complex.real)
## 8.0
print(my_complex.imag)
## 7.0
print(my_complex.conjugate())
## (8-7j)

python_str

andy_str = "there are many motheds we can use !! "
print(andy_str.startswith("there are"))
## True
print(andy_str.endswith("can use ??"))
## False
print(andy_str.find("ma"))
## 10
print(andy_str.count("!"))
## 2
print(andy_str.capitalize())
## There are many motheds we can use !!
print(andy_str.title())
## There Are Many Motheds We Can Use !!
print(andy_str.upper())
## THERE ARE MANY MOTHEDS WE CAN USE !!
print(andy_str.lower())
## there are many motheds we can use !!
print(andy_str.swapcase())
## THERE ARE MANY MOTHEDS WE CAN USE !!
print(andy_str.replace("we", "you"))
## there are many motheds you can use !!

python_boolean

boolean = True
print(boolean.bit_length())
## 1
print(boolean.to_bytes(length = 3, byteorder = "big"))
## b'\x00\x00\x01'
print(boolean.from_bytes(boolean.to_bytes(length = 3, byteorder = "big"), byteorder = "big"))
## True

2. python清單的基本屬性或方法

python_list

andy_list = [21,33,32,45,67,78]
andy_list.append(55)
print(andy_list)
## [21, 33, 32, 45, 67, 78, 55]
andy_list.pop()
print(andy_list)
## [21, 33, 32, 45, 67, 78]
andy_list.pop()
print(andy_list)
## [21, 33, 32, 45, 67]
andy_list.insert(1, 88)
print(andy_list)
## [21, 88, 33, 32, 45, 67]
andy_list.remove(88)
print(andy_list)
## [21, 33, 32, 45, 67]
print(andy_list.index(45))
## 3
andy_list.append(2)
andy_list.append(2)
print(andy_list)
## [21, 33, 32, 45, 67, 2, 2]
print(andy_list.count(2))
## 2
andy_list.sort()
print(andy_list)
## [2, 2, 21, 32, 33, 45, 67]
andy_list.reverse()
print(andy_list)
## [67, 45, 33, 32, 21, 2, 2]

3. python固定清單的基本屬性或方法

python_tuple

my_tuple = (3,4,6,7,88,1,1,1)
print(my_tuple.index(88))
## 4
print(my_tuple.count(1))
## 3

4. python字典的基本屬性或方法

python_dictionary

andy_dictionary = {"var": ["float","int","boolean","str"],
                   "type":["varialbe", "list", "tuple", "dictionary"],
                   "numbers":[1,2,3,7,7,7,7,88]}
print(andy_dictionary.get("var"))
## ['float', 'int', 'boolean', 'str']
print("--------------------------------\n")
## --------------------------------
print(andy_dictionary.keys())
## dict_keys(['var', 'type', 'numbers'])
print("--------------------------------\n")
## --------------------------------
print(andy_dictionary.values())
## dict_values([['float', 'int', 'boolean', 'str'], ['varialbe', 'list', 'tuple', 'dictionary'], [1, 2, 3, 7, 7, 7, 7, 88]])
print("--------------------------------\n")
## --------------------------------
print(andy_dictionary.items())
## dict_items([('var', ['float', 'int', 'boolean', 'str']), ('type', ['varialbe', 'list', 'tuple', 'dictionary']), ('numbers', [1, 2, 3, 7, 7, 7, 7, 88])])
print(dir(andy_dictionary))
## ['__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']

二、其他屬性及方法(python numpy pandas)

1. (numpy) ndim、shape & dtype

python-np.array

import numpy as np 
print("vector = " + str([3,4,6,7,8,30]))
## vector = [3, 4, 6, 7, 8, 30]
print("vector_nparray = " + str(np.array([3,4,6,7,8,30])))
## vector_nparray = [ 3  4  6  7  8 30]
print("\n")
print(np.array([3,4,6,7,8,30]).ndim)
## 1
print(np.array([3,4,6,7,8,30]).shape)
## (6,)
print(np.array([3,4,6,7,8,30]).dtype)
## int64

python-2d np.array

import numpy as np 
print("2d_matrix = " + str([[3,4,6,7,8,30], range(1,7)]))
## 2d_matrix = [[3, 4, 6, 7, 8, 30], range(1, 7)]
print("2d_matrix_nparray = " + str(np.array([[3,4,6,7,8,30], range(1,7)])))
## 2d_matrix_nparray = [[ 3  4  6  7  8 30]
##  [ 1  2  3  4  5  6]]
print("\n")
print(np.array([[3,4,6,7,8,30],range(1,7)]).ndim)
## 2
print(np.array([[3,4,6,7,8,30],range(1,7)]).shape)
## (2, 6)
print(np.array([[3,4,6,7,8,30],range(1,7)]).dtype)
## int64

2. (numpy) zero、empty & arange

import numpy as np 
print("vector = " + str(np.zeros(6)))
## vector = [ 0.  0.  0.  0.  0.  0.]
print("----------")
## ----------
print("2d = " + str(np.zeros((2,6))))
## 2d = [[ 0.  0.  0.  0.  0.  0.]
##  [ 0.  0.  0.  0.  0.  0.]]
print("----------")
## ----------
print("3d = " + str(np.zeros((2,2,2))))
## 3d = [[[ 0.  0.]
##   [ 0.  0.]]
## 
##  [[ 0.  0.]
##   [ 0.  0.]]]
print("----------")
## ----------
print("3empty = " + str(np.empty((2,2,2))))
## 3empty = [[[ 0.  0.]
##   [ 0.  0.]]
## 
##  [[ 0.  0.]
##   [ 0.  0.]]]
print("----------")
## ----------
print("sequence = " + str(np.arange(10)))
## sequence = [0 1 2 3 4 5 6 7 8 9]

3. (numpy) np.int64

python-np.otherway

import numpy as np 
str_vector = ["2","3","4","5","10","20"]
str_vector_array = np.array(str_vector)
print(str_vector)
## ['2', '3', '4', '5', '10', '20']
print(str_vector_array)
## ['2' '3' '4' '5' '10' '20']
print(str_vector_array.dtype)
## <U2
print("---------")
## ---------
print(str_vector_array.astype(np.int64))
## [ 2  3  4  5 10 20]
print(str_vector_array.astype(np.int64).dtype)
## int64

python-np.otherway

import numpy as np 
str_vector = str([2,3,4,5,10,20])
print(np.array(str_vector))
## [2, 3, 4, 5, 10, 20]

4. (pandas) ndim、shape、dtype、ix、drop、set_index & sort_values

python-pandas

import pandas as pd 
str_vector = ["2","3","4","5","10","20"]
nb_vector = [20,10,5,4,3,2]
varialbe_type = {
"string" : str_vector,
"numeric" : nb_vector
}
pd_variable_type = pd.DataFrame(varialbe_type)
print(pd_variable_type)
##    numeric string
## 0       20      2
## 1       10      3
## 2        5      4
## 3        4      5
## 4        3     10
## 5        2     20
print("------------------1")
## ------------------1
print(pd_variable_type.ndim)
## 2
print(pd_variable_type.shape)
## (6, 2)
print(pd_variable_type.dtypes)
## numeric     int64
## string     object
## dtype: object
print("------------------2")
## ------------------2
print(pd_variable_type.ix[:,"numeric"])
## 0    20
## 1    10
## 2     5
## 3     4
## 4     3
## 5     2
## Name: numeric, dtype: int64
print(pd_variable_type["numeric"])
## 0    20
## 1    10
## 2     5
## 3     4
## 4     3
## 5     2
## Name: numeric, dtype: int64
print("------------------3")
## ------------------3
print(pd_variable_type.ix[0])
## numeric    20
## string      2
## Name: 0, dtype: object
print("------------------4")
## ------------------4
print(pd_variable_type.ix[2,"numeric"])
## 5
print("------------------5")
## ------------------5
print(pd_variable_type.drop(1, axis = 0))
##    numeric string
## 0       20      2
## 2        5      4
## 3        4      5
## 4        3     10
## 5        2     20
print("------------------6")
## ------------------6
print(pd_variable_type.drop(3, axis = 0))
##    numeric string
## 0       20      2
## 1       10      3
## 2        5      4
## 4        3     10
## 5        2     20
print("------------------7")
## ------------------7
print(pd_variable_type.drop("numeric", axis = 1))
##   string
## 0      2
## 1      3
## 2      4
## 3      5
## 4     10
## 5     20
print("------------------8")
## ------------------8
print(pd_variable_type[pd_variable_type["numeric"] > 5])
##    numeric string
## 0       20      2
## 1       10      3
print("------------------9")
## ------------------9
print(pd_variable_type.set_index([[2,4,5,3,1,0]])) #double [[]]
##    numeric string
## 2       20      2
## 4       10      3
## 5        5      4
## 3        4      5
## 1        3     10
## 0        2     20
print("------------------10")
## ------------------10
print(pd_variable_type.sort_index())
##    numeric string
## 0       20      2
## 1       10      3
## 2        5      4
## 3        4      5
## 4        3     10
## 5        2     20
print("------------------11")
## ------------------11
pd_variable_type.sort_values(by = "string")

三、下載資料(python)

python-download data

import pandas as pd
url = "https://drive.google.com/uc?export=download&id=1B0LQLn9FJi7SIKvFiTBiS0AlU9bjtcqP"
iris_df_csv = pd.read_csv(url)
print(iris_df_csv.head(1))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
## 0           5.1          3.5           1.4          0.2  setosa
url = "https://drive.google.com/uc?export=download&id=1M0sJXe4EvBtb7gifEVs7mRKScH1vJnE1"
iris_df_tsv = pd.read_table(url,sep = "\t")
print(iris_df_tsv.head(1))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
## 0           5.1          3.5           1.4          0.2  setosa
url = "https://drive.google.com/uc?export=download&id=1l140Pp3DlKMIMYdD7u0I32I_fr1inXef"
iris_df_txt = pd.read_table(url,sep = ":")
print(iris_df_txt.head(1))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
## 0           5.1          3.5           1.4          0.2  setosa
url = "https://drive.google.com/uc?export=download&id=16NZEFn0kEiJdzLRxsXf9KBxkhvTFueix"
iris_df_excel = pd.read_excel(url)
print(iris_df_excel.head(1))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
## 0           5.1          3.5           1.4          0.2  setosa

四、爬網頁(python)

1. using bs4 to get all with “lxml” doc

python-screping html-1

import requests as rq
from bs4 import BeautifulSoup
url = "https://www.ptt.cc/bbs/NBA/index.html"
response = rq.get(url)
html_doc = response.text
soup = BeautifulSoup(html_doc,"lxml")
print(soup.prettify())
## <!DOCTYPE html>
## <html>
##  <head>
##   <meta charset="utf-8"/>
##   <meta content="width=device-width, initial-scale=1" name="viewport"/>
##   <title>
##    看板 NBA 文章列表 - 批踢踢實業坊
##   </title>
##   <link href="//images.ptt.cc/bbs/v2.25/bbs-common.css" rel="stylesheet" type="text/css"/>
##   <link href="//images.ptt.cc/bbs/v2.25/bbs-base.css" media="screen" rel="stylesheet" type="text/css"/>
##   <link href="//images.ptt.cc/bbs/v2.25/bbs-custom.css" rel="stylesheet" type="text/css"/>
##   <link href="//images.ptt.cc/bbs/v2.25/pushstream.css" media="screen" rel="stylesheet" type="text/css"/>
##   <link href="//images.ptt.cc/bbs/v2.25/bbs-print.css" media="print" rel="stylesheet" type="text/css"/>
##  </head>
##  <body>
##   <div id="topbar-container">
##    <div class="bbs-content" id="topbar">
##     <a href="/bbs/" id="logo">
##      批踢踢實業坊
##     </a>
##     <span>
##      ›
##     </span>
##     <a class="board" href="/bbs/NBA/index.html">
##      <span class="board-label">
##       看板
##      </span>
##      NBA
##     </a>
##     <a class="right small" href="/about.html">
##      關於我們
##     </a>
##     <a class="right small" href="/contact.html">
##      聯絡資訊
##     </a>
##    </div>
##   </div>
##   <div id="main-container">
##    <div id="action-bar-container">
##     <div class="action-bar">
##      <div class="btn-group btn-group-dir">
##       <a class="btn selected" href="/bbs/NBA/index.html">
##        看板
##       </a>
##       <a class="btn" href="/man/NBA/index.html">
##        精華區
##       </a>
##      </div>
##      <div class="btn-group btn-group-paging">
##       <a class="btn wide" href="/bbs/NBA/index1.html">
##        最舊
##       </a>
##       <a class="btn wide" href="/bbs/NBA/index5883.html">
##        ‹ 上頁
##       </a>
##       <a class="btn wide disabled">
##        下頁 ›
##       </a>
##       <a class="btn wide" href="/bbs/NBA/index.html">
##        最新
##       </a>
##      </div>
##     </div>
##    </div>
##    <div class="r-list-container action-bar-margin bbs-screen">
##     <div class="search-bar">
##      <form action="search" id="search-bar" type="get">
##       <input class="query" name="q" placeholder="搜尋文章⋯" type="text" value=""/>
##      </form>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        63
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525078865.A.13D.html">
##        [情報] 00-01賽季以來季後賽單賽季個人正負值
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        kakala99
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+00-01%E8%B3%BD%E5%AD%A3%E4%BB%A5%E4%BE%86%E5%AD%A3%E5%BE%8C%E8%B3%BD%E5%96%AE%E8%B3%BD%E5%AD%A3%E5%80%8B%E4%BA%BA%E6%AD%A3%E8%B2%A0%E5%80%BC">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Akakala99">
##           搜尋看板內 kakala99 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        69
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525080516.A.65D.html">
##        [新聞] 詹皇絕殺勝喬丹?皮朋:數據好看而已
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        kenny949
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E8%A9%B9%E7%9A%87%E7%B5%95%E6%AE%BA%E5%8B%9D%E5%96%AC%E4%B8%B9%EF%BC%9F%E7%9A%AE%E6%9C%8B%EF%BC%9A%E6%95%B8%E6%93%9A%E5%A5%BD%E7%9C%8B%E8%80%8C%E5%B7%B2">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Akenny949">
##           搜尋看板內 kenny949 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        77
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525081987.A.1E0.html">
##        [公告] 水桶      麻煩板友發文要滿150字
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        Vedan
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E6%B0%B4%E6%A1%B6++++++%E9%BA%BB%E7%85%A9%E6%9D%BF%E5%8F%8B%E7%99%BC%E6%96%87%E8%A6%81%E6%BB%BF150%E5%AD%97">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AVedan">
##           搜尋看板內 Vedan 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##        M
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        82
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525083093.A.2BE.html">
##        [討論] Lue最後一次暫停在說啥
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        tom80727
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+Lue%E6%9C%80%E5%BE%8C%E4%B8%80%E6%AC%A1%E6%9A%AB%E5%81%9C%E5%9C%A8%E8%AA%AA%E5%95%A5">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Atom80727">
##           搜尋看板內 tom80727 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        63
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525083726.A.44C.html">
##        [新聞] 對手從慢郎中變急驚風 勇士諸將直喊累
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        Angel0724
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E5%B0%8D%E6%89%8B%E5%BE%9E%E6%85%A2%E9%83%8E%E4%B8%AD%E8%AE%8A%E6%80%A5%E9%A9%9A%E9%A2%A8+%E5%8B%87%E5%A3%AB%E8%AB%B8%E5%B0%87%E7%9B%B4%E5%96%8A%E7%B4%AF">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AAngel0724">
##           搜尋看板內 Angel0724 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        56
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525087717.A.1EF.html">
##        [情報] Oakley:籃球運動歷史上沒有人訓練比LBJ更刻苦
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        bigDwinsch
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+Oakley%EF%BC%9A%E7%B1%83%E7%90%83%E9%81%8B%E5%8B%95%E6%AD%B7%E5%8F%B2%E4%B8%8A%E6%B2%92%E6%9C%89%E4%BA%BA%E8%A8%93%E7%B7%B4%E6%AF%94LBJ%E6%9B%B4%E5%88%BB%E8%8B%A6">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AbigDwinsch">
##           搜尋看板內 bigDwinsch 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        80
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525088557.A.059.html">
##        [花邊] D'Antoni:我們有世界上最好的控衛中的2個
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        bigDwinsch
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E8%8A%B1%E9%82%8A%5D+D%27Antoni%EF%BC%9A%E6%88%91%E5%80%91%E6%9C%89%E4%B8%96%E7%95%8C%E4%B8%8A%E6%9C%80%E5%A5%BD%E7%9A%84%E6%8E%A7%E8%A1%9B%E4%B8%AD%E7%9A%842%E5%80%8B">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AbigDwinsch">
##           搜尋看板內 bigDwinsch 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        54
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525090365.A.5A4.html">
##        Re: [BOX ] Pacers 101:105 Cavaliers
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        saitou68201
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5BBOX+%5D+Pacers+101%3A105+Cavaliers">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Asaitou68201">
##           搜尋看板內 saitou68201 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        98
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525091021.A.ECF.html">
##        [新聞] NBA》裴頓:馬刺該把里歐納德賣去籃網
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        lovea
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+NBA%E3%80%8B%E8%A3%B4%E9%A0%93%EF%BC%9A%E9%A6%AC%E5%88%BA%E8%A9%B2%E6%8A%8A%E9%87%8C%E6%AD%90%E7%B4%8D%E5%BE%B7%E8%B3%A3%E5%8E%BB%E7%B1%83%E7%B6%B2">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Alovea">
##           搜尋看板內 lovea 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f0">
##        X3
##       </span>
##      </div>
##      <div class="title">
##       (本文已被刪除) [craig819]
##      </div>
##      <div class="meta">
##       <div class="author">
##        -
##       </div>
##       <div class="article-menu">
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        19
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525095506.A.35D.html">
##        [情報] 明日裁判情報
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        aaagun
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+%E6%98%8E%E6%97%A5%E8%A3%81%E5%88%A4%E6%83%85%E5%A0%B1">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Aaaagun">
##           搜尋看板內 aaagun 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        54
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525097142.A.C92.html">
##        [新聞] 激勵隊友不輸詹皇 哈登送火箭全隊「蘋果8
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        AAApower
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E6%BF%80%E5%8B%B5%E9%9A%8A%E5%8F%8B%E4%B8%8D%E8%BC%B8%E8%A9%B9%E7%9A%87+%E5%93%88%E7%99%BB%E9%80%81%E7%81%AB%E7%AE%AD%E5%85%A8%E9%9A%8A%E3%80%8C%E8%98%8B%E6%9E%9C8">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AAAApower">
##           搜尋看板內 AAApower 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        53
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525097579.A.587.html">
##        [新聞] 7場惡鬥平均41.4分鐘 詹姆斯坦言燃燒殆盡
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        ppibrother
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+7%E5%A0%B4%E6%83%A1%E9%AC%A5%E5%B9%B3%E5%9D%8741.4%E5%88%86%E9%90%98+%E8%A9%B9%E5%A7%86%E6%96%AF%E5%9D%A6%E8%A8%80%E7%87%83%E7%87%92%E6%AE%86%E7%9B%A1">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Appibrother">
##           搜尋看板內 ppibrother 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        15
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525098401.A.F0E.html">
##        [討論] 溜馬跟爵士是否很相像?
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        k7202001
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E6%BA%9C%E9%A6%AC%E8%B7%9F%E7%88%B5%E5%A3%AB%E6%98%AF%E5%90%A6%E5%BE%88%E7%9B%B8%E5%83%8F%3F">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Ak7202001">
##           搜尋看板內 k7202001 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        25
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525101032.A.957.html">
##        [討論] 幫這季的雷霆平反一下
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        hhll5566
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E5%B9%AB%E9%80%99%E5%AD%A3%E7%9A%84%E9%9B%B7%E9%9C%86%E5%B9%B3%E5%8F%8D%E4%B8%80%E4%B8%8B">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Ahhll5566">
##           搜尋看板內 hhll5566 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        16
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525101951.A.A4D.html">
##        [情報] Wesley Matthews 執行2018-19賽季球員選項
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        thnlkj0665
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+Wesley+Matthews+%E5%9F%B7%E8%A1%8C2018-19%E8%B3%BD%E5%AD%A3%E7%90%83%E5%93%A1%E9%81%B8%E9%A0%85">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Athnlkj0665">
##           搜尋看板內 thnlkj0665 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        17
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525102956.A.502.html">
##        Re: [討論] 幫這季的雷霆平反一下
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        ClownT
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E5%B9%AB%E9%80%99%E5%AD%A3%E7%9A%84%E9%9B%B7%E9%9C%86%E5%B9%B3%E5%8F%8D%E4%B8%80%E4%B8%8B">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3AClownT">
##           搜尋看板內 ClownT 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-list-sep">
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1504521964.A.8F9.html">
##        [公告] 板規v6.2
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        abc7360393
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E6%9D%BF%E8%A6%8Fv6.2">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Aabc7360393">
##           搜尋看板內 abc7360393 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        9/04
##       </div>
##       <div class="mark">
##        !
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        54
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1523270876.A.535.html">
##        [公告] 季後賽期間條款
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        namie810303
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E5%AD%A3%E5%BE%8C%E8%B3%BD%E6%9C%9F%E9%96%93%E6%A2%9D%E6%AC%BE">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Anamie810303">
##           搜尋看板內 namie810303 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/09
##       </div>
##       <div class="mark">
##        M
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f1">
##        爆
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1523545760.A.55B.html">
##        [情報] 2017-18 NBA Playoffs 圖表、賽程、轉播
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        matthew0123
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+2017-18+NBA+Playoffs+%E5%9C%96%E8%A1%A8%E3%80%81%E8%B3%BD%E7%A8%8B%E3%80%81%E8%BD%89%E6%92%AD">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Amatthew0123">
##           搜尋看板內 matthew0123 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/12
##       </div>
##       <div class="mark">
##       </div>
##      </div>
##     </div>
##     <div class="r-ent">
##      <div class="nrec">
##       <span class="hl f3">
##        63
##       </span>
##      </div>
##      <div class="title">
##       <a href="/bbs/NBA/M.1525054339.A.7C7.html">
##        [公告] NBA 板 開始舉辦樂透!
##       </a>
##      </div>
##      <div class="meta">
##       <div class="author">
##        ericf129
##       </div>
##       <div class="article-menu">
##        <div class="trigger">
##         ⋯
##        </div>
##        <div class="dropdown">
##         <div class="item">
##          <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+NBA+%E6%9D%BF+%E9%96%8B%E5%A7%8B%E8%88%89%E8%BE%A6%E6%A8%82%E9%80%8F%21">
##           搜尋同標題文章
##          </a>
##         </div>
##         <div class="item">
##          <a href="/bbs/NBA/search?q=author%3Aericf129">
##           搜尋看板內 ericf129 的文章
##          </a>
##         </div>
##        </div>
##       </div>
##       <div class="date">
##        4/30
##       </div>
##       <div class="mark">
##        M
##       </div>
##      </div>
##     </div>
##    </div>
##   </div>
##   <script>
##    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
##   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
##   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
##   })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
## 
##   ga('create', 'UA-32365737-1', {
##     cookieDomain: 'ptt.cc',
##     legacyCookieDomain: 'ptt.cc'
##   });
##   ga('send', 'pageview');
##   </script>
##   <script src="//ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js">
##   </script>
##   <script src="//images.ptt.cc/bbs/v2.25/bbs.js">
##   </script>
##  </body>
## </html>

2. a & tiltle

python-screping html-2

import requests as rq
from bs4 import BeautifulSoup
url = "https://www.ptt.cc/bbs/NBA/index.html"
response = rq.get(url)
html_doc = response.text
soup = BeautifulSoup(html_doc,"lxml")
print(soup.title)
## <title>看板 NBA 文章列表 - 批踢踢實業坊</title>
print("-----------------------1")
## -----------------------1
print(soup.title.name)
## title
print("-----------------------2")
## -----------------------2
print(soup.title.string)
## 看板 NBA 文章列表 - 批踢踢實業坊
print("-----------------------3")
## -----------------------3
print(soup.a)
## <a href="/bbs/" id="logo">批踢踢實業坊</a>
print("-----------------------4")
## -----------------------4
print(soup.a.string)
## 批踢踢實業坊
print(soup.body.a.contents)
## ['批踢踢實業坊']
print(list(soup.body.a.children))
## ['批踢踢實業坊']
print("-----------------------5")
## -----------------------5
print(soup.find_all("a"))
## [<a href="/bbs/" id="logo">批踢踢實業坊</a>, <a class="board" href="/bbs/NBA/index.html"><span class="board-label">看板 </span>NBA</a>, <a class="right small" href="/about.html">關於我們</a>, <a class="right small" href="/contact.html">聯絡資訊</a>, <a class="btn selected" href="/bbs/NBA/index.html">看板</a>, <a class="btn" href="/man/NBA/index.html">精華區</a>, <a class="btn wide" href="/bbs/NBA/index1.html">最舊</a>, <a class="btn wide" href="/bbs/NBA/index5883.html">‹ 上頁</a>, <a class="btn wide disabled">下頁 ›</a>, <a class="btn wide" href="/bbs/NBA/index.html">最新</a>, <a href="/bbs/NBA/M.1525078865.A.13D.html">[情報] 00-01賽季以來季後賽單賽季個人正負值</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+00-01%E8%B3%BD%E5%AD%A3%E4%BB%A5%E4%BE%86%E5%AD%A3%E5%BE%8C%E8%B3%BD%E5%96%AE%E8%B3%BD%E5%AD%A3%E5%80%8B%E4%BA%BA%E6%AD%A3%E8%B2%A0%E5%80%BC">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Akakala99">搜尋看板內 kakala99 的文章</a>, <a href="/bbs/NBA/M.1525080516.A.65D.html">[新聞] 詹皇絕殺勝喬丹?皮朋:數據好看而已</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E8%A9%B9%E7%9A%87%E7%B5%95%E6%AE%BA%E5%8B%9D%E5%96%AC%E4%B8%B9%EF%BC%9F%E7%9A%AE%E6%9C%8B%EF%BC%9A%E6%95%B8%E6%93%9A%E5%A5%BD%E7%9C%8B%E8%80%8C%E5%B7%B2">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Akenny949">搜尋看板內 kenny949 的文章</a>, <a href="/bbs/NBA/M.1525081987.A.1E0.html">[公告] 水桶      麻煩板友發文要滿150字</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E6%B0%B4%E6%A1%B6++++++%E9%BA%BB%E7%85%A9%E6%9D%BF%E5%8F%8B%E7%99%BC%E6%96%87%E8%A6%81%E6%BB%BF150%E5%AD%97">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AVedan">搜尋看板內 Vedan 的文章</a>, <a href="/bbs/NBA/M.1525083093.A.2BE.html">[討論] Lue最後一次暫停在說啥</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+Lue%E6%9C%80%E5%BE%8C%E4%B8%80%E6%AC%A1%E6%9A%AB%E5%81%9C%E5%9C%A8%E8%AA%AA%E5%95%A5">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Atom80727">搜尋看板內 tom80727 的文章</a>, <a href="/bbs/NBA/M.1525083726.A.44C.html">[新聞] 對手從慢郎中變急驚風 勇士諸將直喊累</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E5%B0%8D%E6%89%8B%E5%BE%9E%E6%85%A2%E9%83%8E%E4%B8%AD%E8%AE%8A%E6%80%A5%E9%A9%9A%E9%A2%A8+%E5%8B%87%E5%A3%AB%E8%AB%B8%E5%B0%87%E7%9B%B4%E5%96%8A%E7%B4%AF">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AAngel0724">搜尋看板內 Angel0724 的文章</a>, <a href="/bbs/NBA/M.1525087717.A.1EF.html">[情報] Oakley:籃球運動歷史上沒有人訓練比LBJ更刻苦</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+Oakley%EF%BC%9A%E7%B1%83%E7%90%83%E9%81%8B%E5%8B%95%E6%AD%B7%E5%8F%B2%E4%B8%8A%E6%B2%92%E6%9C%89%E4%BA%BA%E8%A8%93%E7%B7%B4%E6%AF%94LBJ%E6%9B%B4%E5%88%BB%E8%8B%A6">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AbigDwinsch">搜尋看板內 bigDwinsch 的文章</a>, <a href="/bbs/NBA/M.1525088557.A.059.html">[花邊] D'Antoni:我們有世界上最好的控衛中的2個</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E8%8A%B1%E9%82%8A%5D+D%27Antoni%EF%BC%9A%E6%88%91%E5%80%91%E6%9C%89%E4%B8%96%E7%95%8C%E4%B8%8A%E6%9C%80%E5%A5%BD%E7%9A%84%E6%8E%A7%E8%A1%9B%E4%B8%AD%E7%9A%842%E5%80%8B">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AbigDwinsch">搜尋看板內 bigDwinsch 的文章</a>, <a href="/bbs/NBA/M.1525090365.A.5A4.html">Re: [BOX ] Pacers 101:105 Cavaliers</a>, <a href="/bbs/NBA/search?q=thread%3A%5BBOX+%5D+Pacers+101%3A105+Cavaliers">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Asaitou68201">搜尋看板內 saitou68201 的文章</a>, <a href="/bbs/NBA/M.1525091021.A.ECF.html">[新聞] NBA》裴頓:馬刺該把里歐納德賣去籃網</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+NBA%E3%80%8B%E8%A3%B4%E9%A0%93%EF%BC%9A%E9%A6%AC%E5%88%BA%E8%A9%B2%E6%8A%8A%E9%87%8C%E6%AD%90%E7%B4%8D%E5%BE%B7%E8%B3%A3%E5%8E%BB%E7%B1%83%E7%B6%B2">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Alovea">搜尋看板內 lovea 的文章</a>, <a href="/bbs/NBA/M.1525095506.A.35D.html">[情報] 明日裁判情報</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+%E6%98%8E%E6%97%A5%E8%A3%81%E5%88%A4%E6%83%85%E5%A0%B1">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Aaaagun">搜尋看板內 aaagun 的文章</a>, <a href="/bbs/NBA/M.1525097142.A.C92.html">[新聞] 激勵隊友不輸詹皇 哈登送火箭全隊「蘋果8</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+%E6%BF%80%E5%8B%B5%E9%9A%8A%E5%8F%8B%E4%B8%8D%E8%BC%B8%E8%A9%B9%E7%9A%87+%E5%93%88%E7%99%BB%E9%80%81%E7%81%AB%E7%AE%AD%E5%85%A8%E9%9A%8A%E3%80%8C%E8%98%8B%E6%9E%9C8">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AAAApower">搜尋看板內 AAApower 的文章</a>, <a href="/bbs/NBA/M.1525097579.A.587.html">[新聞] 7場惡鬥平均41.4分鐘 詹姆斯坦言燃燒殆盡</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%96%B0%E8%81%9E%5D+7%E5%A0%B4%E6%83%A1%E9%AC%A5%E5%B9%B3%E5%9D%8741.4%E5%88%86%E9%90%98+%E8%A9%B9%E5%A7%86%E6%96%AF%E5%9D%A6%E8%A8%80%E7%87%83%E7%87%92%E6%AE%86%E7%9B%A1">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Appibrother">搜尋看板內 ppibrother 的文章</a>, <a href="/bbs/NBA/M.1525098401.A.F0E.html">[討論] 溜馬跟爵士是否很相像?</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E6%BA%9C%E9%A6%AC%E8%B7%9F%E7%88%B5%E5%A3%AB%E6%98%AF%E5%90%A6%E5%BE%88%E7%9B%B8%E5%83%8F%3F">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Ak7202001">搜尋看板內 k7202001 的文章</a>, <a href="/bbs/NBA/M.1525101032.A.957.html">[討論] 幫這季的雷霆平反一下</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E5%B9%AB%E9%80%99%E5%AD%A3%E7%9A%84%E9%9B%B7%E9%9C%86%E5%B9%B3%E5%8F%8D%E4%B8%80%E4%B8%8B">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Ahhll5566">搜尋看板內 hhll5566 的文章</a>, <a href="/bbs/NBA/M.1525101951.A.A4D.html">[情報] Wesley Matthews 執行2018-19賽季球員選項</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+Wesley+Matthews+%E5%9F%B7%E8%A1%8C2018-19%E8%B3%BD%E5%AD%A3%E7%90%83%E5%93%A1%E9%81%B8%E9%A0%85">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Athnlkj0665">搜尋看板內 thnlkj0665 的文章</a>, <a href="/bbs/NBA/M.1525102956.A.502.html">Re: [討論] 幫這季的雷霆平反一下</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E8%A8%8E%E8%AB%96%5D+%E5%B9%AB%E9%80%99%E5%AD%A3%E7%9A%84%E9%9B%B7%E9%9C%86%E5%B9%B3%E5%8F%8D%E4%B8%80%E4%B8%8B">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3AClownT">搜尋看板內 ClownT 的文章</a>, <a href="/bbs/NBA/M.1504521964.A.8F9.html">[公告] 板規v6.2</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E6%9D%BF%E8%A6%8Fv6.2">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Aabc7360393">搜尋看板內 abc7360393 的文章</a>, <a href="/bbs/NBA/M.1523270876.A.535.html">[公告] 季後賽期間條款</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+%E5%AD%A3%E5%BE%8C%E8%B3%BD%E6%9C%9F%E9%96%93%E6%A2%9D%E6%AC%BE">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Anamie810303">搜尋看板內 namie810303 的文章</a>, <a href="/bbs/NBA/M.1523545760.A.55B.html">[情報] 2017-18 NBA Playoffs 圖表、賽程、轉播</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E6%83%85%E5%A0%B1%5D+2017-18+NBA+Playoffs+%E5%9C%96%E8%A1%A8%E3%80%81%E8%B3%BD%E7%A8%8B%E3%80%81%E8%BD%89%E6%92%AD">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Amatthew0123">搜尋看板內 matthew0123 的文章</a>, <a href="/bbs/NBA/M.1525054339.A.7C7.html">[公告] NBA 板 開始舉辦樂透!</a>, <a href="/bbs/NBA/search?q=thread%3A%5B%E5%85%AC%E5%91%8A%5D+NBA+%E6%9D%BF+%E9%96%8B%E5%A7%8B%E8%88%89%E8%BE%A6%E6%A8%82%E9%80%8F%21">搜尋同標題文章</a>, <a href="/bbs/NBA/search?q=author%3Aericf129">搜尋看板內 ericf129 的文章</a>]
print("-----------------------6")
## -----------------------6
print(type(soup))
## <class 'bs4.BeautifulSoup'>
print("-----------------------7")
## -----------------------7
print(soup.title.parent)
## <head>
## <meta charset="utf-8"/>
## <meta content="width=device-width, initial-scale=1" name="viewport"/>
## <title>看板 NBA 文章列表 - 批踢踢實業坊</title>
## <link href="//images.ptt.cc/bbs/v2.25/bbs-common.css" rel="stylesheet" type="text/css"/>
## <link href="//images.ptt.cc/bbs/v2.25/bbs-base.css" media="screen" rel="stylesheet" type="text/css"/>
## <link href="//images.ptt.cc/bbs/v2.25/bbs-custom.css" rel="stylesheet" type="text/css"/>
## <link href="//images.ptt.cc/bbs/v2.25/pushstream.css" media="screen" rel="stylesheet" type="text/css"/>
## <link href="//images.ptt.cc/bbs/v2.25/bbs-print.css" media="print" rel="stylesheet" type="text/css"/>
## </head>
print("-----------------------8")
## -----------------------8
print(type(soup.a))
## <class 'bs4.element.Tag'>

3. scraping table tennis issue from “PPT”

python-screping html-3

import numpy as np
import pandas as pd
import requests as rq
from bs4 import BeautifulSoup
author_ids = []
recommends = []
post_titles = []
post_dates = []
for n in ["1498","1499"]: 
  url = "https://www.ptt.cc/bbs/tabletennis/index" + n + ".html"
  response = rq.get(url)
  soup = BeautifulSoup(response.text,"lxml")
  posts = soup.find_all("div", class_ = "r-ent")
  recommendations = soup.find_all("div", class_ = "nrec")     
  for post in posts: 
    try:
        author_ids.append(post.find("div", class_ = "author").string)    
    except:
        author_ids.append(np.nan)
    try:
        post_titles.append(post.find("a").string)
    except:
        post_titles.append(np.nan)
    try:
        post_dates.append(post.find("div", class_ = "date").string)
    except:
        post_dates.append(np.nan)
        
  
  for recommendation in recommendations: 
    try:
        recommends.append(recommendation.find("span").string)    
    except:
        recommends.append(np.nan)
ptt_table_tennis_dict = {"author": author_ids,
                "recommends": recommends,
                "title": post_titles,
                "date": post_dates
}
ptt_table_tennis_df = pd.DataFrame(ptt_table_tennis_dict)
print(ptt_table_tennis_df)
##           author   date recommends                                     title
## 0      huang0406   1/25          3                                [心得] 團體世界盃
## 1         steepy   1/25          2                         Re: [買賣] 二手球拍 近全新
## 2           zkow   1/25          1                  [新聞] 中國乒協新規令眾名將傻眼 卻給張本智和
## 3           zkow   1/25        NaN                  [新聞] 國乒小將首輪游被停賽?詳解乒協選拔新規
## 4           zkow   1/25          3                 [新聞] 江宏傑簽進S.H.E公司!福原愛現身尾牙
## 5           zkow   1/25        NaN                        [新聞] 建德國小男桌團體金牌5連霸
## 6      a11261202   1/26         11                           [徵求] 中直 試打 重量輕佳
## 7      fortemp95   1/26          9                           [買賣] 全新 拍、皮、膠、油
## 8           zkow   1/26          4                      [新聞] 日乒小天后:這支日本隊史上最強
## 9           zkow   1/26        NaN                  [新聞] 歐冠波爾率隊晉級半決賽 奧恰洛夫水谷隼
## 10    thomas0312   1/26        NaN           Re: [比賽] 2018-01-27 台大兩人三分制積分賽 
## 11          zkow   1/26          2                      [情報] 鄭怡靜今晚將出戰歐冠八強團體賽
## 12      horseboa   1/26          8                         [新聞] 日本桌球,新生代快速成長
## 13      pushline   1/26        NaN                                  台北桌球入門教學
## 14    nittakutsp   1/27          5                 [專欄] 僅剩四席的亞運、世錦賽資格 誰能出線? 
## 15       ichieh1   1/27         11                  [Live] 107年中華桌球國手排名賽1/27
## 16     huang0406   1/27        NaN                               [有趣] 早田學鄭怡靜
## 17   edison55026   1/27        NaN                              [買賣] 紅黑碳王WRB
## 18          zkow   1/27        NaN                  [新聞] 劉國梁:乒乓已經融入血液 永遠是乒乓人
## 19   kappaisshit   1/27         15                         [心得] 一樣桌球拍和皮搭配的心得
## 20      x1245678   1/27        NaN                     [買賣]ludeack power+兩面皮
## 21          zkow   1/27          4                 [新聞] 張本智和:今後乒壇將是我的時代 打破中國
## 22       ichieh1   1/27        NaN                  [新聞] 桌球亞運選拔 16歲左手天才林昀儒出線
## 23       ichieh1   1/27          1                   [新聞] 打進女桌亞運代表隊 劉馨尹:一起為國
## 24       ichieh1   1/27          2                  [Live] 107年中華桌球國手排名賽1/28
## 25     wfjh31734   1/28          9                         Re: [心得] 國手排名賽觀後感
## 26   FJUPINGPONG   1/28         10                           [問題] 狂飆龍5反手膠皮選擇
## 27          zkow   1/28          2                         [新聞] 乒超-許昕贏馬龍難救上海
## 28        steepy   1/28        NaN                          [買賣] 二手球拍,近全新,降價
## 29     huang0406   1/28          1                         [影片] 臺灣調皮靜VS瑞典白毛姐
## 30   ilvbaseball   1/28          2                [新聞] 2018年世錦賽、亞運國手選拔賽結束 中華
## 31     wfjh31734   1/28         13                         Re: [心得] 國手排名賽觀後感
## 32  blocktheball   1/28          1  [買賣] (新增)Boll ZLF,KORBEL,黑檀5,Vega pro,藍碳
## 33        realvn   1/29         12                  [新聞] 桌球》亞運國手十人名單出爐 江宏傑確定
## 34    thomas0312   1/29          1                             [比賽] 週六台大積分賽 
## 35            Sn   1/29          7                         [問題] 有人報名桌球單項協會的嗎
## 36          zkow   1/29          1                  [新聞] 以某種形式參與東京奧運 福原愛疑似退役
## 37          zkow   1/29        NaN                  [新聞] 印度終極聯賽第二季賽期定檔 阿魯納埃克
## 38          zkow   1/29          2                  [新聞] 廢直通賽?破人才閒置之困?誰解乒協新政
## 39          zkow   1/30         34                   [新聞] 日本沒人能贏張本智和 這裡有七個答案

五、scikit-learn(python)

1. 玩具資料(Toy datasets)

from sklearn import datasets
import pandas as pd 
iris = datasets.load_iris()
print(type(iris.data)) 
## <class 'numpy.ndarray'>
print("--------------")
## --------------
print(iris.feature_names)
## ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
iris_df = pd.DataFrame(iris.data, columns = iris.feature_names)
iris_df.ix[:, "species"] =  iris.target
print(iris_df.head(2))
##    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
## 0                5.1               3.5                1.4               0.2   
## 1                4.9               3.0                1.4               0.2   
## 
##    species  
## 0        0  
## 1        0

2. 資料匯入跟維度轉換(os,np.transpose)

import os
import numpy as np
os.chdir('/Users/chuang/Desktop/R/python-r/')
text_file = open("height.txt")
lines = text_file.read().split('#')[4]
data=[]
for n in range(1, 5): 
  cdata = lines.split('\n')[n].split(',')
  cdata = [int(i) for i in cdata]
  data.append(cdata)
print(data)
## [[1, 182, 154, 181, 179, 157], [2, 184, 199, 173, 197, 191], [3, 196, 167, 158, 162, 187], [4, 174, 180, 181, 191, 194]]
data = np.transpose(data)
print(data)
## [[  1   2   3   4]
##  [182 184 196 174]
##  [154 199 167 180]
##  [181 173 158 181]
##  [179 197 162 191]
##  [157 191 187 194]]

3. 線性迴歸預測linear_model

import numpy as np
from sklearn.linear_model import LinearRegression
temperatures = np.array([29, 28, 34, 31, 25, 29, 32, 31, 24, 33, 25, 31, 26, 30])
iced_tea_sales = np.array([77, 62, 93, 84, 59, 64, 80, 75, 58, 91, 51, 73, 65, 84])
lm = LinearRegression()
lenth = [len(temperatures),len(iced_tea_sales)]
x = np.reshape(temperatures,(lenth[0], 1))
y = np.reshape(iced_tea_sales,(lenth[1], 1))
lm.fit(x, y)
#linear parameter
print(lm.coef_)
## [[ 3.73788546]]
print(lm.intercept_ )
## [-36.36123348]
New = np.reshape(np.array([30,35]),(len(np.array([30,35])),1))
sales_prediction = lm.predict(New)
#predict the New data
print(sales_prediction)
## [[ 75.7753304 ]
##  [ 94.46475771]]
import matplotlib.pyplot as plt
plt.scatter(x, y, color ='black')
plt.plot(x, lm.predict(x), color='green', linewidth = 3)
plt.plot(New, sales_prediction, color = 'red', marker = '^', markersize = 10)
plt.xticks((25,30,35))
plt.yticks((40,50,60,70,80,90,100))
#plot
plt.show()

4. 複迴歸預測、羅傑士迴歸

Python

import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([
    [1, 80], [2, 90], [3, 112], [5, 500], [3, 300], [3, 220], [7, 800], [9, 1000], [1, 30], [2, 100]
])
y = np.array([168, 252, 421, 538, 406, 380, 570, 680, 173, 220])
lm = LinearRegression()
lm.fit(X, y)
#coeffiecnt 
print(lm.coef_)
#interceptive
## [ 111.34944277   -0.37588593]
print(lm.intercept_)
#new data 
## 101.428337351
predicted = np.array([
    [10, 110]
])
predicted_value = lm.predict(predicted)
print(predicted_value)
#print predicted value
## [ 1173.57531319]
mse = np.mean((lm.predict(X) - y) ** 2)
r_squared = lm.score(X, y)
adj_r_squared = 1 - (1 - r_squared) * ((X.shape[0] -1)/ (X.shape[0] - X.shape[1] - 1))
print(mse)
## 2233.34642843
print(r_squared)
## 0.921136558131
print(adj_r_squared)
## 0.898604146169

R

store <- c(1, 2, 3, 5, 3, 3, 7, 9, 1, 2)
dis <- c(80, 90, 112, 500, 300, 220, 800, 1000, 30, 100)
monthly_sales <- c(168, 252, 421, 538, 406, 380, 570, 680, 173, 220)
bakery_df <- data.frame(store, dis, monthly_sales)

lm_fit <- lm(monthly_sales ~ ., data = bakery_df)

#coeffiecnt
lm_fit$coefficients[-1]
##       store         dis 
## 111.3494428  -0.3758859
#interceptive
lm_fit$coefficients[1]
## (Intercept) 
##    101.4283
#new data 
predicted_new <- data.frame(store = 10, dis = 110)


predicted_sales <- predict(lm_fit, newdata = predicted_new)

#predicted value
predicted_sales
##        1 
## 1173.575
# performance
mse <- mean((monthly_sales - predicted_sales) ^ 2)

# print
mse
## [1] 656811.9
summary(lm_fit)$r.squared
## [1] 0.9211366
summary(lm_fit)$adj.r.squared
## [1] 0.8986041