# coding=utf-8 ''' lxml和bs4解析html对比 例子,通过两种方法,把百度所有产品打印出来。 Created on 2017年7月3日 @vsersion:python3.6 @author: liuyuqi ''' import requests from bs4 import BeautifulSoup from lxml import etree url = "https://www.baidu.com/more/" res = requests.get(url) html = res.text.encode(res.encoding).decode('utf-8') # 使用beautiful解析 soup = BeautifulSoup(html, 'lxml') titles = soup.findAll('div', {'class': 'con'}) print(len(titles)) for title in titles: print(soup.find_all('a')[1].text) # 不好抓取 # 使用lxml解析 # //*[@id="content"]/div[1]/div[2]/a # //*[@id="content"]/div[2]/div[2]/a selector = etree.HTML(html) titles = selector.xpath('//*[@id="content"]/div/div/a/text()') for title in titles: print(title)