這樣就完成了,最後參考完整程式碼如下:
#目標: https://www.google.com/search?q=
#2603
#匯入爬蟲基本套件requests
import requests
#匯入爬蟲解析套件requests
from bs4 import BeautifulSoup
#目標網址後方加上個股資訊. ex長榮:2603
targetURL = ‘https://www.google.com/search?q=’
def get_stock_page(url, stock_id):
headers = {‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) ‘
‘AppleWebKit/537.36 (KHTML, like Gecko) ‘
‘Chrome/66.0.3359.181 Safari/537.36’}
resp = requests.get(url + stock_id, headers=headers)
if resp.status_code != 200:
print(‘Invalid url:’, resp.url)
return None
else:
return resp.text
def get_stock_info(webtxt):
soup = BeautifulSoup(webtxt, ‘html.parser’)
stock = dict()
sections = soup.find_all(‘g-card-section’)
# 第4個 g-card-section, 有左右兩個 table 分別存放股票資訊
for table in sections[3].find_all(‘table’):
for tr in table.find_all(‘tr’)[:3]: #[:3] 取得3個字
key = tr.find_all(‘td’)[0].text.lower().strip()
#lower()轉小寫。 strip()去除頭尾空
value = tr.find_all(‘td’)[1].text.strip()
stock[key] = value
return stock
if __name__ == ‘__main__’:
page = get_stock_page(targetURL, ‘2603’)
if page:
stock = get_stock_info(page)
for k, v in stock.items():
print(k, v)