|
@@ -40,12 +40,12 @@ class Sse(object):
|
|
|
|
|
|
def init_browser(self):
|
|
|
chrome_option = webdriver.EdgeOptions()
|
|
|
+ chrome_option.add_argument('--headless')
|
|
|
chrome_option.add_experimental_option("detach", True)
|
|
|
chrome_option.add_experimental_option('excludeSwitches', ['enable-logging'])
|
|
|
service = webdriver.EdgeService(EdgeChromiumDriverManager().install())
|
|
|
self.driver = webdriver.Edge(service = service, options=chrome_option)
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
def crawl(self):
|
|
|
self.get_hangye_data()
|
|
|
self.get_diqu_data()
|
|
@@ -55,70 +55,82 @@ class Sse(object):
|
|
|
def get_hangye_data(self):
|
|
|
''' 获取行业数据 '''
|
|
|
url_hangyes = f'{self._host}/assortment/stock/areatrade/trade/'
|
|
|
- # resp = self.sess.get(url_hangyes)
|
|
|
- self.driver.get(url_hangyes)
|
|
|
try:
|
|
|
- pass
|
|
|
- except Exception as e:
|
|
|
- pass
|
|
|
- finally:
|
|
|
- pass
|
|
|
-
|
|
|
- # wait = WebDriverWait(self.driver, 5)
|
|
|
- # selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: The result of the xpath expression "/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()" is: [object Text]. It should be an element.
|
|
|
- # wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()')))
|
|
|
- time.sleep(5)
|
|
|
-
|
|
|
- html = self.driver.page_source
|
|
|
- soup = etree.HTML(html)
|
|
|
- hangye_names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()')
|
|
|
- hangye_names_url = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/@href')
|
|
|
- hangye_codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[2]/text()')
|
|
|
-
|
|
|
- for i in range(len(hangye_names_url)):
|
|
|
- self.driver.get(f'{self._host}{hangye_names_url[i]}')
|
|
|
- time.sleep(5)
|
|
|
+ print(f'driver url:{url_hangyes}')
|
|
|
+ self.driver.get(url_hangyes)
|
|
|
+ wait = WebDriverWait(self.driver, 10)
|
|
|
+ # selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: The result of the xpath expression "/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()" is: [object Text]. It should be an element.
|
|
|
+ wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table')))
|
|
|
+ time.sleep(2)
|
|
|
html = self.driver.page_source
|
|
|
soup = etree.HTML(html)
|
|
|
- codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/text()')
|
|
|
- names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[2]/text()')
|
|
|
- for j in range(len(codes)):
|
|
|
- self.hangye_date.append([hangye_names[i], hangye_codes[i], codes[j], names[j]])
|
|
|
- time.sleep(random.randint(1,3))
|
|
|
-
|
|
|
- def save_to_csv(self):
|
|
|
- with open('sse_hangye.csv','w',newline='',encoding='utf-8') as f:
|
|
|
- writer = csv.writer(f)
|
|
|
- writer.writerow(['行业名称','行业代码','股票代码','名称'])
|
|
|
- writer.writerows(self.hangye_date)
|
|
|
-
|
|
|
- with open('sse_diqu.csv','w',newline='',encoding='utf-8') as f:
|
|
|
- writer = csv.writer(f)
|
|
|
- writer.writerow(['地区名称','股票代码','名称'])
|
|
|
- writer.writerows(self.diqu_date)
|
|
|
+ hangye_names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()')
|
|
|
+ hangye_names_url = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/@href')
|
|
|
+ hangye_codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[2]/text()')
|
|
|
+ for i in range(len(hangye_names_url)):
|
|
|
+ print(f'driver get url:{self._host}{hangye_names_url[i]}')
|
|
|
+ try:
|
|
|
+ self.driver.get(f'{self._host}{hangye_names_url[i]}')
|
|
|
+
|
|
|
+ wait = WebDriverWait(self.driver, 10)
|
|
|
+ # selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: The result of the xpath expression "/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()" is: [object Text]. It should be an element.
|
|
|
+ wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table')))
|
|
|
+ time.sleep(1)
|
|
|
+ html = self.driver.page_source
|
|
|
+ soup = etree.HTML(html)
|
|
|
+ codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/text()')
|
|
|
+ names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[2]/text()')
|
|
|
+ for j in range(len(codes)):
|
|
|
+ self.hangye_date.append([hangye_names[i], hangye_codes[i], codes[j], names[j]])
|
|
|
+ except Exception as e:
|
|
|
+ print(f'error:{e}')
|
|
|
+ except Exception as e:
|
|
|
+ print(f'error:{e}')
|
|
|
|
|
|
def get_diqu_data(self):
|
|
|
''' 获取地区数据
|
|
|
'''
|
|
|
url_diqus = f'{self._host}/assortment/stock/areatrade/area/'
|
|
|
- self.driver.get(url_diqus)
|
|
|
-
|
|
|
- time.sleep(5)
|
|
|
+ print(f'driver url:{url_diqus}')
|
|
|
+ try:
|
|
|
+ self.driver.get(url_diqus)
|
|
|
+ except Exception as e:
|
|
|
+ print(f'error:{e}')
|
|
|
+ return
|
|
|
+ wait = WebDriverWait(self.driver, 10)
|
|
|
+ # selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: The result of the xpath expression "/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()" is: [object Text]. It should be an element.
|
|
|
+ wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table')))
|
|
|
+ time.sleep(1)
|
|
|
html = self.driver.page_source
|
|
|
soup = etree.HTML(html)
|
|
|
diqu_names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/text()')
|
|
|
diqu_names_url = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/@href')
|
|
|
|
|
|
for i in range(len(diqu_names_url)):
|
|
|
- self.driver.get(f'{self._host}{diqu_names_url[i]}')
|
|
|
- time.sleep(5)
|
|
|
- html = self.driver.page_source
|
|
|
- soup = etree.HTML(html)
|
|
|
- codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/text()')
|
|
|
- names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[2]/text()')
|
|
|
- for j in range(len(codes)):
|
|
|
- self.diqu_date.append([diqu_names[i], codes[j], names[j]])
|
|
|
- time.sleep(random.randint(1,3))
|
|
|
+ try:
|
|
|
+ print(f'driver get url:{self._host}{diqu_names_url[i]}')
|
|
|
+ self.driver.get(f'{self._host}{diqu_names_url[i]}')
|
|
|
|
|
|
+ wait = WebDriverWait(self.driver, 10)
|
|
|
+ # selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: The result of the xpath expression "/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody[2]/tr/td[1]/a/text()" is: [object Text]. It should be an element.
|
|
|
+ wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table')))
|
|
|
+ time.sleep(1)
|
|
|
+ html = self.driver.page_source
|
|
|
+ soup = etree.HTML(html)
|
|
|
+ codes = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[1]/a/text()')
|
|
|
+ names = soup.xpath('/html/body/div[8]/div/div[2]/div/div[1]/div[1]/table/tbody/tr/td[2]/text()')
|
|
|
+ for j in range(len(codes)):
|
|
|
+ self.diqu_date.append([diqu_names[i], codes[j], names[j]])
|
|
|
+ except Exception as e:
|
|
|
+ print(f'error:{e}')
|
|
|
|
|
|
-
|
|
|
+ def save_to_csv(self):
|
|
|
+ with open('sse_hangye.csv','w',newline='',encoding='utf-8') as f:
|
|
|
+ writer = csv.writer(f)
|
|
|
+ writer.writerow(['行业名称','行业代码','股票代码','名称'])
|
|
|
+ writer.writerows(self.hangye_date)
|
|
|
+
|
|
|
+ with open('sse_diqu.csv','w',newline='',encoding='utf-8') as f:
|
|
|
+ writer = csv.writer(f)
|
|
|
+ writer.writerow(['地区名称','股票代码','名称'])
|
|
|
+ writer.writerows(self.diqu_date)
|