# coding=utf-8 ''' Created on 2017年7月1日 @vsersion:python3.6 @author: liuyuqi ''' from bs4 import BeautifulSoup url = 'http://sh.lianjia.com/ershoufang/pudongxinqu' # res=requests.get(url) # res=res.text.encode(res.encoding).decode('utf-8') # file = open("resultFile.txt",'w',encoding = 'utf-8') # file.write(res) file = open("resultFile.txt", 'r', encoding='UTF-8') try: res = file.read() finally: file.close() soup = BeautifulSoup(res, 'html.parser') # 使用bs4模块,对响应的链接源代码进行html解析 page = soup.findAll('div', {'class': 'c-pagination'}) pages = [i.strip() for i in page[0].text.split('\n')] # 抓取出每个区域的二手房链接中所有的页数 if len(pages) > 3: total_pages = int(pages[-3]) else: total_pages = int(pages[-2]) # print(total_pages) find_all = soup.find_all(name='div', attrs={'class': 'info'}) # print(len(find_all)) res2 = find_all[1] title = res2.find('a')['title'] print(res2) name = res2.find_all('div', {'class': 'info-row'})[1].find_all('span')[0].text # 每套二手房的小区名称 room_type = res2.find_all('div', {'class': 'info-row'})[0].find_all('span')[1].text # 每套二手房的户型 # size = res2.find_all('div',{'class':'info-row'})[0].find_all('span')[2].text[:-3] # 每套二手房的面积 print(room_type)