認識網路爬蟲
-
利用 BeautifulSoup 套件爬取臺北流行音樂中心最新10則活動。
import requests from bs4 import BeautifulSoup import csv # 目標網址 url = "https://www.tmc.taipei/tw/blog/show" # 送出 GET 請求 response = requests.get(url) response.encoding = 'utf-8' # 解析 HTML soup = BeautifulSoup(response.text, "html.parser") # 找出活動卡片區塊 events = soup.select("div.c-card-clip")[:10] # 取前10筆 print(events) results = [] for e in events: # 活動名稱 title_tag = e.select_one("h3.c-card-clip__title") title = title_tag.get_text(strip=True) if title_tag else "N/A" # 活動圖片 img_tag = e.select_one("img") img = img_tag["src"] if img_tag and img_tag.has_attr("src") else "N/A" # 活動資訊 date_tag = e.select_one("span.date") date = date_tag.get_text(strip=True) if date_tag else "N/A" results.append([title, img, date]) # 寫入 CSV 檔案 with open("tmc_events.csv", "w", newline="", encoding="utf-8-sig") as f: writer = csv.writer(f) writer.writerow(["活動名稱", "活動圖片", "活動資訊"]) writer.writerows(results)將爬取的資料使用投影片輪播的方式展示。
import csv import time from PIL import Image from IPython.display import display, clear_output import requests from io import BytesIO # 讀取 CSV(第二欄是圖片 URL) data = [] with open("tmc_events.csv", newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) next(reader) # 如果 CSV 有標題行,可跳過 for row in reader: data.append(row) # 將 URL 轉成 PIL Image 物件 image_list = [] for item in data: url = item[1] # 第二欄是圖片網址 try: response = requests.get(url) img = Image.open(BytesIO(response.content)) image_list.append(img) except Exception as e: print(f"無法讀取 {url}: {e}") # 在 Colab Notebook 中輪播顯示 for img in image_list: clear_output(wait=True) # 清空上一次圖片 display(img) # 顯示圖片 time.sleep(2) # 每張圖片停留 2 秒