3 weeks ago · 3727411230
--- a/crawl_font.egg-info/PKG-INFO
+++ b/crawl_font.egg-info/PKG-INFO
@@ -0,0 +1,26 @@
 
				+Metadata-Version: 2.4
			
 
				+Name: crawl-font
			
 
				+Version: 0.1.0
			
 
				+Summary: Font downloader for fonts.net.cn and other font websites
			
 
				+Requires-Python: >=3.12
			
 
				+Description-Content-Type: text/markdown
			
 
				+Requires-Dist: requests>=2.32.5
			
 
				+Requires-Dist: beautifulsoup4>=4.12.0
			
 
				+Requires-Dist: lxml>=5.0.0
			
 
				+Requires-Dist: tqdm>=4.66.0
			
 
				+
			
 
				+# crawl_font
			
 
				+
			
 
				+font download tool
			
 
				+
			
 
				+## Develop
			
 
				+
			
 
				+```
			
 
				+uv sync
			
 
				+uv run main.py
			
 
				+```
			
 
				+
			
 
				+## License
			
 
				+
			
 
				+Licensed under the [Apache 2.0](LICENSE) © [liuyuqi.gov@msn.cn](https://github.com/jianboy)
			
 
				+
			
--- a/crawl_font.egg-info/dependency_links.txt
+++ b/crawl_font.egg-info/dependency_links.txt
@@ -0,0 +1 @@
 
				+
			
--- a/crawl_font.egg-info/requires.txt
+++ b/crawl_font.egg-info/requires.txt
@@ -0,0 +1,4 @@
 
				+requests>=2.32.5
			
 
				+beautifulsoup4>=4.12.0
			
 
				+lxml>=5.0.0
			
 
				+tqdm>=4.66.0
			
--- a/crawl_font/font.py
+++ b/crawl_font/font.py
@@ -4,38 +4,555 @@
 
				 @Contact :   liuyuqi.gov@msn.cn
			
 
				 @Time    :   2024/07/30 19:02:15
			
 
				 @License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				-@Desc    :   
			
 
				+@Desc    :   Font downloader for fonts.net.cn and chinaz.com
			
 
				 '''
			
 
				+import os
			
 
				+import re
			
 
				+import time
			
 
				+import zipfile
			
 
				 import requests
			
 
				+from bs4 import BeautifulSoup
			
 
				+from urllib.parse import urljoin, urlparse
			
 
				+from tqdm import tqdm
			
 
				+from typing import List, Dict, Optional, Tuple
			
 
				+
			
 
				 
			
 
				 class Font(object):
			
 
				-    """docstring for Font"""
			
 
				+    """Font downloader class"""
			
 
				+    
			
 
				     header = {
			
 
				-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
			
 
				+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
			
 
				+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
			
 
				+        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
			
 
				     }
			
 
				-    def __init__(self):
			
 
				+    
			
 
				+    FONTSNET_BASE = 'https://www.fonts.net.cn'
			
 
				+    CHINAZ_BASE = 'https://font.chinaz.com'
			
 
				+    
			
 
				+    def __init__(self, download_dir: str = './fonts'):
			
 
				         self.sess = requests.Session()
			
 
				         self.sess.headers.update(self.header)
			
 
				+        self.download_dir = download_dir
			
 
				+        self._ensure_download_dir()
			
 
				         
			
 
				+    def _ensure_download_dir(self):
			
 
				+        if not os.path.exists(self.download_dir):
			
 
				+            os.makedirs(self.download_dir)
			
 
				+    
			
 
				     def run(self):
			
 
				-        pass
			
 
				-
			
 
				+        print("=" * 50)
			
 
				+        print("字体下载工具")
			
 
				+        print("=" * 50)
			
 
				+        print("\n请选择要爬取的网站:")
			
 
				+        print("1. 字体天下 (fonts.net.cn) - 商用免费字体")
			
 
				+        print("2. 站长字体 (font.chinaz.com)")
			
 
				+        print("3. 下载指定字体ID")
			
 
				+        print("4. 退出")
			
 
				+        
			
 
				+        choice = input("\n请输入选项 (1-4): ").strip()
			
 
				+        
			
 
				+        if choice == '1':
			
 
				+            self.crawl_fontsnet()
			
 
				+        elif choice == '2':
			
 
				+            self.crawl_chinaz()
			
 
				+        elif choice == '3':
			
 
				+            font_id = input("请输入字体ID (例如: 37476120124): ").strip()
			
 
				+            if font_id:
			
 
				+                self.download_font_by_id(font_id)
			
 
				+        elif choice == '4':
			
 
				+            print("退出程序")
			
 
				+            return
			
 
				+        else:
			
 
				+            print("无效选项，退出程序")
			
 
				+    
			
 
				     def crawl_fontsnet(self):
			
 
				-        # download 532 商用字体
			
 
				-        self.sess.headers.update(
			
 
				-            {
			
 
				-                "Origin": "https://www.fonts.net.cn/"
			
 
				-            }
			
 
				-        )
			
 
				+        print("\n" + "=" * 50)
			
 
				+        print("开始爬取字体天下 (fonts.net.cn)")
			
 
				+        print("=" * 50)
			
 
				+        
			
 
				+        print("\n请选择爬取方式:")
			
 
				+        print("1. 按分类爬取 (中文字体/英文字体/图形字体)")
			
 
				+        print("2. 爬取商用免费字体")
			
 
				+        print("3. 爬取最新字体")
			
 
				+        print("4. 返回主菜单")
			
 
				+        
			
 
				+        choice = input("\n请输入选项 (1-4): ").strip()
			
 
				+        
			
 
				+        if choice == '1':
			
 
				+            self._crawl_by_category()
			
 
				+        elif choice == '2':
			
 
				+            self._crawl_free_commercial()
			
 
				+        elif choice == '3':
			
 
				+            self._crawl_latest()
			
 
				+        elif choice == '4':
			
 
				+            return
			
 
				+        else:
			
 
				+            print("无效选项")
			
 
				+    
			
 
				+    def _crawl_by_category(self):
			
 
				+        print("\n分类选项:")
			
 
				+        print("1. 中文字体")
			
 
				+        print("2. 英文字体")
			
 
				+        print("3. 图形字体")
			
 
				+        print("4. 返回")
			
 
				+        
			
 
				+        choice = input("\n请选择分类 (1-4): ").strip()
			
 
				+        
			
 
				+        category_urls = {
			
 
				+            '1': '/font-zh.html',
			
 
				+            '2': '/font-en.html',
			
 
				+            '3': '/font-other.html',
			
 
				+        }
			
 
				+        
			
 
				+        if choice in category_urls:
			
 
				+            url = self.FONTSNET_BASE + category_urls[choice]
			
 
				+            fonts = self._get_fonts_from_list_page(url)
			
 
				+            self._process_font_list(fonts)
			
 
				+        elif choice == '4':
			
 
				+            return
			
 
				+        else:
			
 
				+            print("无效选项")
			
 
				+    
			
 
				+    def _crawl_free_commercial(self):
			
 
				+        print("\n爬取商用免费字体...")
			
 
				+        url = self.FONTSNET_BASE + '/font-zh.html'
			
 
				+        fonts = self._get_fonts_from_list_page(url, filter_free=True)
			
 
				+        self._process_font_list(fonts)
			
 
				+    
			
 
				+    def _crawl_latest(self):
			
 
				+        print("\n爬取最新字体...")
			
 
				+        url = self.FONTSNET_BASE + '/font-zh.html'
			
 
				+        fonts = self._get_fonts_from_list_page(url)
			
 
				+        self._process_font_list(fonts[:20])
			
 
				+    
			
 
				+    def _get_fonts_from_list_page(self, url: str, filter_free: bool = False) -> List[Dict]:
			
 
				+        fonts = []
			
 
				+        try:
			
 
				+            print(f"正在访问: {url}")
			
 
				+            resp = self.sess.get(url, timeout=30)
			
 
				+            resp.encoding = 'utf-8'
			
 
				+            soup = BeautifulSoup(resp.text, 'lxml')
			
 
				+            
			
 
				+            font_links = soup.find_all('a', href=re.compile(r'/font-\d+\.html'))
			
 
				+            
			
 
				+            seen_ids = set()
			
 
				+            for link in font_links:
			
 
				+                href = link.get('href', '')
			
 
				+                match = re.search(r'/font-(\d+)\.html', href)
			
 
				+                if match:
			
 
				+                    font_id = match.group(1)
			
 
				+                    if font_id not in seen_ids:
			
 
				+                        seen_ids.add(font_id)
			
 
				+                        font_name = link.get_text(strip=True)
			
 
				+                        if not font_name:
			
 
				+                            font_name = f'font_{font_id}'
			
 
				+                        
			
 
				+                        fonts.append({
			
 
				+                            'id': font_id,
			
 
				+                            'name': font_name,
			
 
				+                            'url': urljoin(self.FONTSNET_BASE, href)
			
 
				+                        })
			
 
				+            
			
 
				+            print(f"找到 {len(fonts)} 个字体")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"获取字体列表失败: {e}")
			
 
				+        
			
 
				+        return fonts
			
 
				+    
			
 
				+    def _process_font_list(self, fonts: List[Dict]):
			
 
				+        if not fonts:
			
 
				+            print("没有找到可下载的字体")
			
 
				+            return
			
 
				+        
			
 
				+        print(f"\n找到 {len(fonts)} 个字体:")
			
 
				+        for i, font in enumerate(fonts[:10], 1):
			
 
				+            print(f"  {i}. {font['name']} (ID: {font['id']})")
			
 
				+        
			
 
				+        if len(fonts) > 10:
			
 
				+            print(f"  ... 还有 {len(fonts) - 10} 个字体")
			
 
				+        
			
 
				+        print("\n操作选项:")
			
 
				+        print("1. 下载所有字体")
			
 
				+        print("2. 下载指定范围 (例如: 1-5)")
			
 
				+        print("3. 输入字体ID下载")
			
 
				+        print("4. 返回")
			
 
				+        
			
 
				+        choice = input("\n请选择操作 (1-4): ").strip()
			
 
				+        
			
 
				+        if choice == '1':
			
 
				+            for font in tqdm(fonts, desc="下载字体"):
			
 
				+                self.download_font(font)
			
 
				+        elif choice == '2':
			
 
				+            range_str = input("请输入范围 (例如: 1-5): ").strip()
			
 
				+            try:
			
 
				+                start, end = map(int, range_str.split('-'))
			
 
				+                for font in fonts[start-1:end]:
			
 
				+                    self.download_font(font)
			
 
				+            except Exception as e:
			
 
				+                print(f"输入格式错误: {e}")
			
 
				+        elif choice == '3':
			
 
				+            font_id = input("请输入字体ID: ").strip()
			
 
				+            if font_id:
			
 
				+                self.download_font_by_id(font_id)
			
 
				+        elif choice == '4':
			
 
				+            return
			
 
				+        else:
			
 
				+            print("无效选项")
			
 
				+    
			
 
				+    def download_font_by_id(self, font_id: str):
			
 
				+        font = {
			
 
				+            'id': font_id,
			
 
				+            'name': f'font_{font_id}',
			
 
				+            'url': f'{self.FONTSNET_BASE}/font-{font_id}.html'
			
 
				+        }
			
 
				+        self.download_font(font)
			
 
				+    
			
 
				+    def download_font(self, font: Dict) -> bool:
			
 
				+        print(f"\n正在处理字体: {font['name']} (ID: {font['id']})")
			
 
				+        
			
 
				+        detail_url = font.get('url', '')
			
 
				+        if not detail_url:
			
 
				+            detail_url = f'{self.FONTSNET_BASE}/font-{font["id"]}.html'
			
 
				+        
			
 
				+        try:
			
 
				+            download_urls = self._parse_detail_page(detail_url)
			
 
				+            
			
 
				+            if not download_urls:
			
 
				+                print(f"  未找到下载链接: {font['name']}")
			
 
				+                return False
			
 
				+            
			
 
				+            success = False
			
 
				+            for url_info in download_urls:
			
 
				+                download_url = url_info.get('url', '')
			
 
				+                download_type = url_info.get('type', 'unknown')
			
 
				+                
			
 
				+                print(f"  尝试下载 ({download_type}): {download_url[:50]}...")
			
 
				+                
			
 
				+                save_path = self._download_file(download_url, font['name'])
			
 
				+                if save_path:
			
 
				+                    print(f"  下载成功: {save_path}")
			
 
				+                    success = True
			
 
				+                    break
			
 
				+            
			
 
				+            return success
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"  下载失败: {font['name']}, 错误: {e}")
			
 
				+            return False
			
 
				+    
			
 
				+    def _parse_detail_page(self, url: str) -> List[Dict]:
			
 
				+        download_urls = []
			
 
				+        
			
 
				+        try:
			
 
				+            resp = self.sess.get(url, timeout=30)
			
 
				+            resp.encoding = 'utf-8'
			
 
				+            soup = BeautifulSoup(resp.text, 'lxml')
			
 
				+            
			
 
				+            download_links = soup.find_all('a', string=re.compile(r'下载|download|Download'))
			
 
				+            
			
 
				+            for link in download_links:
			
 
				+                href = link.get('href', '')
			
 
				+                if href and not href.startswith('#') and not href.startswith('javascript'):
			
 
				+                    full_url = urljoin(self.FONTSNET_BASE, href)
			
 
				+                    text = link.get_text(strip=True)
			
 
				+                    
			
 
				+                    if '免费' in text or 'free' in text.lower():
			
 
				+                        download_type = 'free'
			
 
				+                    elif '官网' in text or 'official' in text.lower():
			
 
				+                        download_type = 'official'
			
 
				+                    else:
			
 
				+                        download_type = 'direct'
			
 
				+                    
			
 
				+                    download_urls.append({
			
 
				+                        'url': full_url,
			
 
				+                        'type': download_type,
			
 
				+                        'text': text
			
 
				+                    })
			
 
				+            
			
 
				+            all_links = soup.find_all('a', href=True)
			
 
				+            for link in all_links:
			
 
				+                href = link.get('href', '')
			
 
				+                if re.search(r'\.(zip|rar|7z|ttf|otf|woff)', href, re.I):
			
 
				+                    full_url = urljoin(self.FONTSNET_BASE, href)
			
 
				+                    if not any(u['url'] == full_url for u in download_urls):
			
 
				+                        download_urls.append({
			
 
				+                            'url': full_url,
			
 
				+                            'type': 'direct_file',
			
 
				+                            'text': link.get_text(strip=True)
			
 
				+                        })
			
 
				+            
			
 
				+            scripts = soup.find_all('script')
			
 
				+            for script in scripts:
			
 
				+                script_text = script.get_text() if script else ''
			
 
				+                if script_text:
			
 
				+                    url_patterns = [
			
 
				+                        r'["\'](https?://[^"\']+\.(?:zip|rar|7z|ttf|otf|woff))["\']',
			
 
				+                        r'["\'](/download/[^"\']+)["\']',
			
 
				+                    ]
			
 
				+                    for pattern in url_patterns:
			
 
				+                        matches = re.findall(pattern, script_text)
			
 
				+                        for match in matches:
			
 
				+                            full_url = urljoin(self.FONTSNET_BASE, match)
			
 
				+                            if not any(u['url'] == full_url for u in download_urls):
			
 
				+                                download_urls.append({
			
 
				+                                    'url': full_url,
			
 
				+                                    'type': 'script_extracted',
			
 
				+                                    'text': '从脚本提取'
			
 
				+                                })
			
 
				+            
			
 
				+            print(f"  解析到 {len(download_urls)} 个下载链接")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"  解析详情页失败: {e}")
			
 
				+        
			
 
				+        return download_urls
			
 
				+    
			
 
				+    def _download_file(self, url: str, font_name: str) -> Optional[str]:
			
 
				+        try:
			
 
				+            headers = self.header.copy()
			
 
				+            headers['Referer'] = self.FONTSNET_BASE
			
 
				+            
			
 
				+            resp = self.sess.get(url, headers=headers, stream=True, timeout=60, allow_redirects=True)
			
 
				+            
			
 
				+            if resp.status_code != 200:
			
 
				+                print(f"    HTTP状态码: {resp.status_code}")
			
 
				+                return None
			
 
				+            
			
 
				+            content_type = resp.headers.get('Content-Type', '')
			
 
				+            content_disposition = resp.headers.get('Content-Disposition', '')
			
 
				+            
			
 
				+            filename = self._extract_filename(content_disposition, url, font_name)
			
 
				+            
			
 
				+            safe_filename = self._sanitize_filename(filename)
			
 
				+            save_path = os.path.join(self.download_dir, safe_filename)
			
 
				+            
			
 
				+            total_size = int(resp.headers.get('Content-Length', 0))
			
 
				+            
			
 
				+            print(f"    保存到: {save_path}")
			
 
				+            if total_size > 0:
			
 
				+                print(f"    文件大小: {total_size / 1024:.1f} KB")
			
 
				+            
			
 
				+            with open(save_path, 'wb') as f:
			
 
				+                if total_size > 0:
			
 
				+                    with tqdm(total=total_size, unit='B', unit_scale=True, desc='    下载') as pbar:
			
 
				+                        for chunk in resp.iter_content(chunk_size=8192):
			
 
				+                            if chunk:
			
 
				+                                f.write(chunk)
			
 
				+                                pbar.update(len(chunk))
			
 
				+                else:
			
 
				+                    for chunk in resp.iter_content(chunk_size=8192):
			
 
				+                        if chunk:
			
 
				+                            f.write(chunk)
			
 
				+            
			
 
				+            if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
			
 
				+                file_size = os.path.getsize(save_path)
			
 
				+                print(f"    下载完成，大小: {file_size / 1024:.1f} KB")
			
 
				+                
			
 
				+                if save_path.lower().endswith('.zip'):
			
 
				+                    self._extract_zip(save_path)
			
 
				+                
			
 
				+                return save_path
			
 
				+            else:
			
 
				+                if os.path.exists(save_path):
			
 
				+                    os.remove(save_path)
			
 
				+                return None
			
 
				+                
			
 
				+        except requests.exceptions.Timeout:
			
 
				+            print(f"    下载超时")
			
 
				+            return None
			
 
				+        except requests.exceptions.ConnectionError as e:
			
 
				+            print(f"    连接错误: {e}")
			
 
				+            return None
			
 
				+        except Exception as e:
			
 
				+            print(f"    下载错误: {e}")
			
 
				+            return None
			
 
				+    
			
 
				+    def _extract_filename(self, content_disposition: str, url: str, default_name: str) -> str:
			
 
				+        if content_disposition:
			
 
				+            match = re.search(r'filename[^;=\n]*=((["\']).*?\2|[^;\n]*)', content_disposition)
			
 
				+            if match:
			
 
				+                filename = match.group(1).strip('"\'')
			
 
				+                if filename:
			
 
				+                    return filename
			
 
				+        
			
 
				+        parsed_url = urlparse(url)
			
 
				+        path = parsed_url.path
			
 
				+        if path and path != '/':
			
 
				+            filename = os.path.basename(path)
			
 
				+            if filename and '.' in filename:
			
 
				+                return filename
			
 
				+        
			
 
				+        return f"{default_name}.zip"
			
 
				+    
			
 
				+    def _sanitize_filename(self, filename: str) -> str:
			
 
				+        invalid_chars = '<>:"/\\|?*'
			
 
				+        for char in invalid_chars:
			
 
				+            filename = filename.replace(char, '_')
			
 
				+        
			
 
				+        if len(filename) > 200:
			
 
				+            name, ext = os.path.splitext(filename)
			
 
				+            filename = name[:190] + ext
			
 
				+        
			
 
				+        return filename
			
 
				+    
			
 
				+    def _extract_zip(self, zip_path: str):
			
 
				+        try:
			
 
				+            extract_dir = os.path.splitext(zip_path)[0]
			
 
				+            if not os.path.exists(extract_dir):
			
 
				+                os.makedirs(extract_dir)
			
 
				+            
			
 
				+            with zipfile.ZipFile(zip_path, 'r') as zf:
			
 
				+                print(f"    解压到: {extract_dir}")
			
 
				+                zf.extractall(extract_dir)
			
 
				+                
			
 
				+                font_files = []
			
 
				+                for root, dirs, files in os.walk(extract_dir):
			
 
				+                    for file in files:
			
 
				+                        if file.lower().endswith(('.ttf', '.otf', '.woff', '.woff2')):
			
 
				+                            font_files.append(os.path.join(root, file))
			
 
				+                
			
 
				+                if font_files:
			
 
				+                    print(f"    找到 {len(font_files)} 个字体文件:")
			
 
				+                    for ff in font_files[:5]:
			
 
				+                        print(f"      - {os.path.basename(ff)}")
			
 
				+                    if len(font_files) > 5:
			
 
				+                        print(f"      ... 还有 {len(font_files) - 5} 个")
			
 
				+            
			
 
				+        except zipfile.BadZipFile:
			
 
				+            print(f"    警告: 不是有效的 ZIP 文件")
			
 
				+        except Exception as e:
			
 
				+            print(f"    解压失败: {e}")
			
 
				+    
			
 
				+    def crawl_chinaz(self):
			
 
				+        print("\n" + "=" * 50)
			
 
				+        print("开始爬取站长字体 (font.chinaz.com)")
			
 
				+        print("=" * 50)
			
 
				+        
			
 
				+        print("\n功能开发中...")
			
 
				+        print("站长字体网站结构:")
			
 
				+        print("  - 首页: https://font.chinaz.com/")
			
 
				+        print("  - 分类页面: https://font.chinaz.com/zhongwenziti.html")
			
 
				+        print("  - 详情页: https://font.chinaz.com/{font_id}.html")
			
 
				+        
			
 
				+        print("\n请输入要下载的字体详情页URL，或输入 'back' 返回主菜单:")
			
 
				+        url = input("URL: ").strip()
			
 
				+        
			
 
				+        if url.lower() == 'back':
			
 
				+            return
			
 
				+        
			
 
				+        if url.startswith('http'):
			
 
				+            self._download_chinaz_font(url)
			
 
				+        else:
			
 
				+            print("无效的URL")
			
 
				+    
			
 
				+    def _download_chinaz_font(self, url: str) -> bool:
			
 
				+        print(f"\n正在处理: {url}")
			
 
				+        
			
 
				+        try:
			
 
				+            resp = self.sess.get(url, timeout=30)
			
 
				+            resp.encoding = 'utf-8'
			
 
				+            soup = BeautifulSoup(resp.text, 'lxml')
			
 
				+            
			
 
				+            font_name = 'unknown_chinaz_font'
			
 
				+            title_tag = soup.find('title')
			
 
				+            if title_tag:
			
 
				+                title_text = title_tag.get_text()
			
 
				+                match = re.search(r'([^|_]+)', title_text)
			
 
				+                if match:
			
 
				+                    font_name = match.group(1).strip()
			
 
				+            
			
 
				+            download_urls = []
			
 
				+            
			
 
				+            download_links = soup.find_all('a', href=True)
			
 
				+            for link in download_links:
			
 
				+                href = link.get('href', '')
			
 
				+                text = link.get_text(strip=True)
			
 
				+                
			
 
				+                if re.search(r'下载|download|本地|高速', text, re.I):
			
 
				+                    if href and not href.startswith('#') and not href.startswith('javascript'):
			
 
				+                        full_url = urljoin(self.CHINAZ_BASE, href)
			
 
				+                        download_urls.append({
			
 
				+                            'url': full_url,
			
 
				+                            'type': 'chinaz_download',
			
 
				+                            'text': text
			
 
				+                        })
			
 
				+            
			
 
				+            for link in download_links:
			
 
				+                href = link.get('href', '')
			
 
				+                if re.search(r'\.(zip|rar|7z|ttf|otf)', href, re.I):
			
 
				+                    full_url = urljoin(self.CHINAZ_BASE, href)
			
 
				+                    if not any(u['url'] == full_url for u in download_urls):
			
 
				+                        download_urls.append({
			
 
				+                            'url': full_url,
			
 
				+                            'type': 'direct_file',
			
 
				+                            'text': link.get_text(strip=True)
			
 
				+                        })
			
 
				+            
			
 
				+            print(f"  解析到 {len(download_urls)} 个下载链接")
			
 
				+            
			
 
				+            if download_urls:
			
 
				+                font = {
			
 
				+                    'id': 'chinaz_' + str(int(time.time())),
			
 
				+                    'name': font_name,
			
 
				+                    'url': url
			
 
				+                }
			
 
				+                
			
 
				+                for url_info in download_urls:
			
 
				+                    print(f"  尝试下载: {url_info['url'][:60]}...")
			
 
				+                    save_path = self._download_file(url_info['url'], font_name)
			
 
				+                    if save_path:
			
 
				+                        print(f"  下载成功: {save_path}")
			
 
				+                        return True
			
 
				+            
			
 
				+            print("  未找到可下载的链接")
			
 
				+            return False
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"  处理失败: {e}")
			
 
				+            return False
			
 
				+    
			
 
				+    def download(self, url: str, save_path: str = None) -> Optional[str]:
			
 
				+        print(f"\n下载: {url}")
			
 
				+        
			
 
				+        try:
			
 
				+            resp = self.sess.get(url, stream=True, timeout=60)
			
 
				+            
			
 
				+            if resp.status_code != 200:
			
 
				+                print(f"  HTTP状态码: {resp.status_code}")
			
 
				+                return None
			
 
				+            
			
 
				+            if not save_path:
			
 
				+                content_disposition = resp.headers.get('Content-Disposition', '')
			
 
				+                save_path = self._extract_filename(content_disposition, url, 'downloaded_font')
			
 
				+                save_path = os.path.join(self.download_dir, self._sanitize_filename(save_path))
			
 
				+            
			
 
				+            total_size = int(resp.headers.get('Content-Length', 0))
			
 
				+            
			
 
				+            with open(save_path, 'wb') as f:
			
 
				+                if total_size > 0:
			
 
				+                    with tqdm(total=total_size, unit='B', unit_scale=True, desc='  下载') as pbar:
			
 
				+                        for chunk in resp.iter_content(chunk_size=8192):
			
 
				+                            if chunk:
			
 
				+                                f.write(chunk)
			
 
				+                                pbar.update(len(chunk))
			
 
				+                else:
			
 
				+                    for chunk in resp.iter_content(chunk_size=8192):
			
 
				+                        if chunk:
			
 
				+                            f.write(chunk)
			
 
				+            
			
 
				+            print(f"  保存到: {save_path}")
			
 
				+            return save_path
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"  下载失败: {e}")
			
 
				+            return None
			
 
				 
			
 
				 
			
 
				-        pass
			
 
				+def main():
			
 
				+    font_downloader = Font()
			
 
				+    font_downloader.run()
			
 
				 
			
 
				-    def crawl_chinaz(self):
			
 
				-        pass
			
 
				-    
			
 
				-    def download(self):
			
 
				-        pass
			
 
				 
			
 
				-if __name__=='__main__':
			
 
				-    pass
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,12 @@
 
				 [project]
			
 
				 name = "crawl-font"
			
 
				 version = "0.1.0"
			
 
				-description = "Add your description here"
			
 
				+description = "Font downloader for fonts.net.cn and other font websites"
			
 
				 readme = "README.md"
			
 
				 requires-python = ">=3.12"
			
 
				 dependencies = [
			
 
				     "requests>=2.32.5",
			
 
				+    "beautifulsoup4>=4.12.0",
			
 
				+    "lxml>=5.0.0",
			
 
				+    "tqdm>=4.66.0",
			
 
				 ]