4 days ago · 3727411230
--- a/crawl_font.egg-info/PKG-INFO
+++ b/crawl_font.egg-info/PKG-INFO
@@ -0,0 +1,26 @@
 
															+Metadata-Version: 2.4
														
 
															+Name: crawl-font
														
 
															+Version: 0.1.0
														
 
															+Summary: Font downloader for fonts.net.cn and other font websites
														
 
															+Requires-Python: >=3.12
														
 
															+Description-Content-Type: text/markdown
														
 
															+Requires-Dist: requests>=2.32.5
														
 
															+Requires-Dist: beautifulsoup4>=4.12.0
														
 
															+Requires-Dist: lxml>=5.0.0
														
 
															+Requires-Dist: tqdm>=4.66.0
														
 
															+
														
 
															+# crawl_font
														
 
															+
														
 
															+font download tool
														
 
															+
														
 
															+## Develop
														
 
															+
														
 
															+```
														
 
															+uv sync
														
 
															+uv run main.py
														
 
															+```
														
 
															+
														
 
															+## License
														
 
															+
														
 
															+Licensed under the [Apache 2.0](LICENSE) © [liuyuqi.gov@msn.cn](https://github.com/jianboy)
														
 
															+
														
--- a/crawl_font.egg-info/dependency_links.txt
+++ b/crawl_font.egg-info/dependency_links.txt
@@ -0,0 +1 @@
 
															+
														
--- a/crawl_font.egg-info/requires.txt
+++ b/crawl_font.egg-info/requires.txt
@@ -0,0 +1,4 @@
 
															+requests>=2.32.5
														
 
															+beautifulsoup4>=4.12.0
														
 
															+lxml>=5.0.0
														
 
															+tqdm>=4.66.0
														
--- a/crawl_font/font.py
+++ b/crawl_font/font.py
@@ -4,38 +4,555 @@
 
															 @Contact :   liuyuqi.gov@msn.cn
														
 
															 @Time    :   2024/07/30 19:02:15
														
 
															 @License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
														
 
															-@Desc    :   
														
 
															+@Desc    :   Font downloader for fonts.net.cn and chinaz.com
														
 
															 '''
														
 
															+import os
														
 
															+import re
														
 
															+import time
														
 
															+import zipfile
														
 
															 import requests
														
 
															+from bs4 import BeautifulSoup
														
 
															+from urllib.parse import urljoin, urlparse
														
 
															+from tqdm import tqdm
														
 
															+from typing import List, Dict, Optional, Tuple
														
 
															+
														
 
															 class Font(object):
														
 
															-    """docstring for Font"""
														
 
															+    """Font downloader class"""
														
 
															+    
														
 
															     header = {
														
 
															-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
														
 
															+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
														
 
															+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
														
 
															+        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
														
 
															     }
														
 
															-    def __init__(self):
														
 
															+    
														
 
															+    FONTSNET_BASE = 'https://www.fonts.net.cn'
														
 
															+    CHINAZ_BASE = 'https://font.chinaz.com'
														
 
															+    
														
 
															+    def __init__(self, download_dir: str = './fonts'):
														
 
															         self.sess = requests.Session()
														
 
															         self.sess.headers.update(self.header)
														
 
															+        self.download_dir = download_dir
														
 
															+        self._ensure_download_dir()
														
 
															+    def _ensure_download_dir(self):
														
 
															+        if not os.path.exists(self.download_dir):
														
 
															+            os.makedirs(self.download_dir)
														
 
															+    
														
 
															     def run(self):
														
 
															-        pass
														
 
															-
														
 
															+        print("=" * 50)
														
 
															+        print("字体下载工具")
														
 
															+        print("=" * 50)
														
 
															+        print("\n请选择要爬取的网站:")
														
 
															+        print("1. 字体天下 (fonts.net.cn) - 商用免费字体")
														
 
															+        print("2. 站长字体 (font.chinaz.com)")
														
 
															+        print("3. 下载指定字体ID")
														
 
															+        print("4. 退出")
														
 
															+        
														
 
															+        choice = input("\n请输入选项 (1-4): ").strip()
														
 
															+        
														
 
															+        if choice == '1':
														
 
															+            self.crawl_fontsnet()
														
 
															+        elif choice == '2':
														
 
															+            self.crawl_chinaz()
														
 
															+        elif choice == '3':
														
 
															+            font_id = input("请输入字体ID (例如: 37476120124): ").strip()
														
 
															+            if font_id:
														
 
															+                self.download_font_by_id(font_id)
														
 
															+        elif choice == '4':
														
 
															+            print("退出程序")
														
 
															+            return
														
 
															+        else:
														
 
															+            print("无效选项，退出程序")
														
 
															+    
														
 
															     def crawl_fontsnet(self):
														
 
															-        # download 532 商用字体
														
 
															-        self.sess.headers.update(
														
 
															-            {
														
 
															-                "Origin": "https://www.fonts.net.cn/"
														
 
															-            }
														
 
															-        )
														
 
															+        print("\n" + "=" * 50)
														
 
															+        print("开始爬取字体天下 (fonts.net.cn)")
														
 
															+        print("=" * 50)
														
 
															+        
														
 
															+        print("\n请选择爬取方式:")
														
 
															+        print("1. 按分类爬取 (中文字体/英文字体/图形字体)")
														
 
															+        print("2. 爬取商用免费字体")
														
 
															+        print("3. 爬取最新字体")
														
 
															+        print("4. 返回主菜单")
														
 
															+        
														
 
															+        choice = input("\n请输入选项 (1-4): ").strip()
														
 
															+        
														
 
															+        if choice == '1':
														
 
															+            self._crawl_by_category()
														
 
															+        elif choice == '2':
														
 
															+            self._crawl_free_commercial()
														
 
															+        elif choice == '3':
														
 
															+            self._crawl_latest()
														
 
															+        elif choice == '4':
														
 
															+            return
														
 
															+        else:
														
 
															+            print("无效选项")
														
 
															+    
														
 
															+    def _crawl_by_category(self):
														
 
															+        print("\n分类选项:")
														
 
															+        print("1. 中文字体")
														
 
															+        print("2. 英文字体")
														
 
															+        print("3. 图形字体")
														
 
															+        print("4. 返回")
														
 
															+        
														
 
															+        choice = input("\n请选择分类 (1-4): ").strip()
														
 
															+        
														
 
															+        category_urls = {
														
 
															+            '1': '/font-zh.html',
														
 
															+            '2': '/font-en.html',
														
 
															+            '3': '/font-other.html',
														
 
															+        }
														
 
															+        
														
 
															+        if choice in category_urls:
														
 
															+            url = self.FONTSNET_BASE + category_urls[choice]
														
 
															+            fonts = self._get_fonts_from_list_page(url)
														
 
															+            self._process_font_list(fonts)
														
 
															+        elif choice == '4':
														
 
															+            return
														
 
															+        else:
														
 
															+            print("无效选项")
														
 
															+    
														
 
															+    def _crawl_free_commercial(self):
														
 
															+        print("\n爬取商用免费字体...")
														
 
															+        url = self.FONTSNET_BASE + '/font-zh.html'
														
 
															+        fonts = self._get_fonts_from_list_page(url, filter_free=True)
														
 
															+        self._process_font_list(fonts)
														
 
															+    
														
 
															+    def _crawl_latest(self):
														
 
															+        print("\n爬取最新字体...")
														
 
															+        url = self.FONTSNET_BASE + '/font-zh.html'
														
 
															+        fonts = self._get_fonts_from_list_page(url)
														
 
															+        self._process_font_list(fonts[:20])
														
 
															+    
														
 
															+    def _get_fonts_from_list_page(self, url: str, filter_free: bool = False) -> List[Dict]:
														
 
															+        fonts = []
														
 
															+        try:
														
 
															+            print(f"正在访问: {url}")
														
 
															+            resp = self.sess.get(url, timeout=30)
														
 
															+            resp.encoding = 'utf-8'
														
 
															+            soup = BeautifulSoup(resp.text, 'lxml')
														
 
															+            
														
 
															+            font_links = soup.find_all('a', href=re.compile(r'/font-\d+\.html'))
														
 
															+            
														
 
															+            seen_ids = set()
														
 
															+            for link in font_links:
														
 
															+                href = link.get('href', '')
														
 
															+                match = re.search(r'/font-(\d+)\.html', href)
														
 
															+                if match:
														
 
															+                    font_id = match.group(1)
														
 
															+                    if font_id not in seen_ids:
														
 
															+                        seen_ids.add(font_id)
														
 
															+                        font_name = link.get_text(strip=True)
														
 
															+                        if not font_name:
														
 
															+                            font_name = f'font_{font_id}'
														
 
															+                        
														
 
															+                        fonts.append({
														
 
															+                            'id': font_id,
														
 
															+                            'name': font_name,
														
 
															+                            'url': urljoin(self.FONTSNET_BASE, href)
														
 
															+                        })
														
 
															+            
														
 
															+            print(f"找到 {len(fonts)} 个字体")
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            print(f"获取字体列表失败: {e}")
														
 
															+        
														
 
															+        return fonts
														
 
															+    
														
 
															+    def _process_font_list(self, fonts: List[Dict]):
														
 
															+        if not fonts:
														
 
															+            print("没有找到可下载的字体")
														
 
															+            return
														
 
															+        
														
 
															+        print(f"\n找到 {len(fonts)} 个字体:")
														
 
															+        for i, font in enumerate(fonts[:10], 1):
														
 
															+            print(f"  {i}. {font['name']} (ID: {font['id']})")
														
 
															+        
														
 
															+        if len(fonts) > 10:
														
 
															+            print(f"  ... 还有 {len(fonts) - 10} 个字体")
														
 
															+        
														
 
															+        print("\n操作选项:")
														
 
															+        print("1. 下载所有字体")
														
 
															+        print("2. 下载指定范围 (例如: 1-5)")
														
 
															+        print("3. 输入字体ID下载")
														
 
															+        print("4. 返回")
														
 
															+        
														
 
															+        choice = input("\n请选择操作 (1-4): ").strip()
														
 
															+        
														
 
															+        if choice == '1':
														
 
															+            for font in tqdm(fonts, desc="下载字体"):
														
 
															+                self.download_font(font)
														
 
															+        elif choice == '2':
														
 
															+            range_str = input("请输入范围 (例如: 1-5): ").strip()
														
 
															+            try:
														
 
															+                start, end = map(int, range_str.split('-'))
														
 
															+                for font in fonts[start-1:end]:
														
 
															+                    self.download_font(font)
														
 
															+            except Exception as e:
														
 
															+                print(f"输入格式错误: {e}")
														
 
															+        elif choice == '3':
														
 
															+            font_id = input("请输入字体ID: ").strip()
														
 
															+            if font_id:
														
 
															+                self.download_font_by_id(font_id)
														
 
															+        elif choice == '4':
														
 
															+            return
														
 
															+        else:
														
 
															+            print("无效选项")
														
 
															+    
														
 
															+    def download_font_by_id(self, font_id: str):
														
 
															+        font = {
														
 
															+            'id': font_id,
														
 
															+            'name': f'font_{font_id}',
														
 
															+            'url': f'{self.FONTSNET_BASE}/font-{font_id}.html'
														
 
															+        }
														
 
															+        self.download_font(font)
														
 
															+    
														
 
															+    def download_font(self, font: Dict) -> bool:
														
 
															+        print(f"\n正在处理字体: {font['name']} (ID: {font['id']})")
														
 
															+        
														
 
															+        detail_url = font.get('url', '')
														
 
															+        if not detail_url:
														
 
															+            detail_url = f'{self.FONTSNET_BASE}/font-{font["id"]}.html'
														
 
															+        
														
 
															+        try:
														
 
															+            download_urls = self._parse_detail_page(detail_url)
														
 
															+            
														
 
															+            if not download_urls:
														
 
															+                print(f"  未找到下载链接: {font['name']}")
														
 
															+                return False
														
 
															+            
														
 
															+            success = False
														
 
															+            for url_info in download_urls:
														
 
															+                download_url = url_info.get('url', '')
														
 
															+                download_type = url_info.get('type', 'unknown')
														
 
															+                
														
 
															+                print(f"  尝试下载 ({download_type}): {download_url[:50]}...")
														
 
															+                
														
 
															+                save_path = self._download_file(download_url, font['name'])
														
 
															+                if save_path:
														
 
															+                    print(f"  下载成功: {save_path}")
														
 
															+                    success = True
														
 
															+                    break
														
 
															+            
														
 
															+            return success
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            print(f"  下载失败: {font['name']}, 错误: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    def _parse_detail_page(self, url: str) -> List[Dict]:
														
 
															+        download_urls = []
														
 
															+        
														
 
															+        try:
														
 
															+            resp = self.sess.get(url, timeout=30)
														
 
															+            resp.encoding = 'utf-8'
														
 
															+            soup = BeautifulSoup(resp.text, 'lxml')
														
 
															+            
														
 
															+            download_links = soup.find_all('a', string=re.compile(r'下载|download|Download'))
														
 
															+            
														
 
															+            for link in download_links:
														
 
															+                href = link.get('href', '')
														
 
															+                if href and not href.startswith('#') and not href.startswith('javascript'):
														
 
															+                    full_url = urljoin(self.FONTSNET_BASE, href)
														
 
															+                    text = link.get_text(strip=True)
														
 
															+                    
														
 
															+                    if '免费' in text or 'free' in text.lower():
														
 
															+                        download_type = 'free'
														
 
															+                    elif '官网' in text or 'official' in text.lower():
														
 
															+                        download_type = 'official'
														
 
															+                    else:
														
 
															+                        download_type = 'direct'
														
 
															+                    
														
 
															+                    download_urls.append({
														
 
															+                        'url': full_url,
														
 
															+                        'type': download_type,
														
 
															+                        'text': text
														
 
															+                    })
														
 
															+            
														
 
															+            all_links = soup.find_all('a', href=True)
														
 
															+            for link in all_links:
														
 
															+                href = link.get('href', '')
														
 
															+                if re.search(r'\.(zip|rar|7z|ttf|otf|woff)', href, re.I):
														
 
															+                    full_url = urljoin(self.FONTSNET_BASE, href)
														
 
															+                    if not any(u['url'] == full_url for u in download_urls):
														
 
															+                        download_urls.append({
														
 
															+                            'url': full_url,
														
 
															+                            'type': 'direct_file',
														
 
															+                            'text': link.get_text(strip=True)
														
 
															+                        })
														
 
															+            
														
 
															+            scripts = soup.find_all('script')
														
 
															+            for script in scripts:
														
 
															+                script_text = script.get_text() if script else ''
														
 
															+                if script_text:
														
 
															+                    url_patterns = [
														
 
															+                        r'["\'](https?://[^"\']+\.(?:zip|rar|7z|ttf|otf|woff))["\']',
														
 
															+                        r'["\'](/download/[^"\']+)["\']',
														
 
															+                    ]
														
 
															+                    for pattern in url_patterns:
														
 
															+                        matches = re.findall(pattern, script_text)
														
 
															+                        for match in matches:
														
 
															+                            full_url = urljoin(self.FONTSNET_BASE, match)
														
 
															+                            if not any(u['url'] == full_url for u in download_urls):
														
 
															+                                download_urls.append({
														
 
															+                                    'url': full_url,
														
 
															+                                    'type': 'script_extracted',
														
 
															+                                    'text': '从脚本提取'
														
 
															+                                })
														
 
															+            
														
 
															+            print(f"  解析到 {len(download_urls)} 个下载链接")
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            print(f"  解析详情页失败: {e}")
														
 
															+        
														
 
															+        return download_urls
														
 
															+    
														
 
															+    def _download_file(self, url: str, font_name: str) -> Optional[str]:
														
 
															+        try:
														
 
															+            headers = self.header.copy()
														
 
															+            headers['Referer'] = self.FONTSNET_BASE
														
 
															+            
														
 
															+            resp = self.sess.get(url, headers=headers, stream=True, timeout=60, allow_redirects=True)
														
 
															+            
														
 
															+            if resp.status_code != 200:
														
 
															+                print(f"    HTTP状态码: {resp.status_code}")
														
 
															+                return None
														
 
															+            
														
 
															+            content_type = resp.headers.get('Content-Type', '')
														
 
															+            content_disposition = resp.headers.get('Content-Disposition', '')
														
 
															+            
														
 
															+            filename = self._extract_filename(content_disposition, url, font_name)
														
 
															+            
														
 
															+            safe_filename = self._sanitize_filename(filename)
														
 
															+            save_path = os.path.join(self.download_dir, safe_filename)
														
 
															+            
														
 
															+            total_size = int(resp.headers.get('Content-Length', 0))
														
 
															+            
														
 
															+            print(f"    保存到: {save_path}")
														
 
															+            if total_size > 0:
														
 
															+                print(f"    文件大小: {total_size / 1024:.1f} KB")
														
 
															+            
														
 
															+            with open(save_path, 'wb') as f:
														
 
															+                if total_size > 0:
														
 
															+                    with tqdm(total=total_size, unit='B', unit_scale=True, desc='    下载') as pbar:
														
 
															+                        for chunk in resp.iter_content(chunk_size=8192):
														
 
															+                            if chunk:
														
 
															+                                f.write(chunk)
														
 
															+                                pbar.update(len(chunk))
														
 
															+                else:
														
 
															+                    for chunk in resp.iter_content(chunk_size=8192):
														
 
															+                        if chunk:
														
 
															+                            f.write(chunk)
														
 
															+            
														
 
															+            if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
														
 
															+                file_size = os.path.getsize(save_path)
														
 
															+                print(f"    下载完成，大小: {file_size / 1024:.1f} KB")
														
 
															+                
														
 
															+                if save_path.lower().endswith('.zip'):
														
 
															+                    self._extract_zip(save_path)
														
 
															+                
														
 
															+                return save_path
														
 
															+            else:
														
 
															+                if os.path.exists(save_path):
														
 
															+                    os.remove(save_path)
														
 
															+                return None
														
 
															+                
														
 
															+        except requests.exceptions.Timeout:
														
 
															+            print(f"    下载超时")
														
 
															+            return None
														
 
															+        except requests.exceptions.ConnectionError as e:
														
 
															+            print(f"    连接错误: {e}")
														
 
															+            return None
														
 
															+        except Exception as e:
														
 
															+            print(f"    下载错误: {e}")
														
 
															+            return None
														
 
															+    
														
 
															+    def _extract_filename(self, content_disposition: str, url: str, default_name: str) -> str:
														
 
															+        if content_disposition:
														
 
															+            match = re.search(r'filename[^;=\n]*=((["\']).*?\2|[^;\n]*)', content_disposition)
														
 
															+            if match:
														
 
															+                filename = match.group(1).strip('"\'')
														
 
															+                if filename:
														
 
															+                    return filename
														
 
															+        
														
 
															+        parsed_url = urlparse(url)
														
 
															+        path = parsed_url.path
														
 
															+        if path and path != '/':
														
 
															+            filename = os.path.basename(path)
														
 
															+            if filename and '.' in filename:
														
 
															+                return filename
														
 
															+        
														
 
															+        return f"{default_name}.zip"
														
 
															+    
														
 
															+    def _sanitize_filename(self, filename: str) -> str:
														
 
															+        invalid_chars = '<>:"/\\|?*'
														
 
															+        for char in invalid_chars:
														
 
															+            filename = filename.replace(char, '_')
														
 
															+        
														
 
															+        if len(filename) > 200:
														
 
															+            name, ext = os.path.splitext(filename)
														
 
															+            filename = name[:190] + ext
														
 
															+        
														
 
															+        return filename
														
 
															+    
														
 
															+    def _extract_zip(self, zip_path: str):
														
 
															+        try:
														
 
															+            extract_dir = os.path.splitext(zip_path)[0]
														
 
															+            if not os.path.exists(extract_dir):
														
 
															+                os.makedirs(extract_dir)
														
 
															+            
														
 
															+            with zipfile.ZipFile(zip_path, 'r') as zf:
														
 
															+                print(f"    解压到: {extract_dir}")
														
 
															+                zf.extractall(extract_dir)
														
 
															+                
														
 
															+                font_files = []
														
 
															+                for root, dirs, files in os.walk(extract_dir):
														
 
															+                    for file in files:
														
 
															+                        if file.lower().endswith(('.ttf', '.otf', '.woff', '.woff2')):
														
 
															+                            font_files.append(os.path.join(root, file))
														
 
															+                
														
 
															+                if font_files:
														
 
															+                    print(f"    找到 {len(font_files)} 个字体文件:")
														
 
															+                    for ff in font_files[:5]:
														
 
															+                        print(f"      - {os.path.basename(ff)}")
														
 
															+                    if len(font_files) > 5:
														
 
															+                        print(f"      ... 还有 {len(font_files) - 5} 个")
														
 
															+            
														
 
															+        except zipfile.BadZipFile:
														
 
															+            print(f"    警告: 不是有效的 ZIP 文件")
														
 
															+        except Exception as e:
														
 
															+            print(f"    解压失败: {e}")
														
 
															+    
														
 
															+    def crawl_chinaz(self):
														
 
															+        print("\n" + "=" * 50)
														
 
															+        print("开始爬取站长字体 (font.chinaz.com)")
														
 
															+        print("=" * 50)
														
 
															+        
														
 
															+        print("\n功能开发中...")
														
 
															+        print("站长字体网站结构:")
														
 
															+        print("  - 首页: https://font.chinaz.com/")
														
 
															+        print("  - 分类页面: https://font.chinaz.com/zhongwenziti.html")
														
 
															+        print("  - 详情页: https://font.chinaz.com/{font_id}.html")
														
 
															+        
														
 
															+        print("\n请输入要下载的字体详情页URL，或输入 'back' 返回主菜单:")
														
 
															+        url = input("URL: ").strip()
														
 
															+        
														
 
															+        if url.lower() == 'back':
														
 
															+            return
														
 
															+        
														
 
															+        if url.startswith('http'):
														
 
															+            self._download_chinaz_font(url)
														
 
															+        else:
														
 
															+            print("无效的URL")
														
 
															+    
														
 
															+    def _download_chinaz_font(self, url: str) -> bool:
														
 
															+        print(f"\n正在处理: {url}")
														
 
															+        
														
 
															+        try:
														
 
															+            resp = self.sess.get(url, timeout=30)
														
 
															+            resp.encoding = 'utf-8'
														
 
															+            soup = BeautifulSoup(resp.text, 'lxml')
														
 
															+            
														
 
															+            font_name = 'unknown_chinaz_font'
														
 
															+            title_tag = soup.find('title')
														
 
															+            if title_tag:
														
 
															+                title_text = title_tag.get_text()
														
 
															+                match = re.search(r'([^|_]+)', title_text)
														
 
															+                if match:
														
 
															+                    font_name = match.group(1).strip()
														
 
															+            
														
 
															+            download_urls = []
														
 
															+            
														
 
															+            download_links = soup.find_all('a', href=True)
														
 
															+            for link in download_links:
														
 
															+                href = link.get('href', '')
														
 
															+                text = link.get_text(strip=True)
														
 
															+                
														
 
															+                if re.search(r'下载|download|本地|高速', text, re.I):
														
 
															+                    if href and not href.startswith('#') and not href.startswith('javascript'):
														
 
															+                        full_url = urljoin(self.CHINAZ_BASE, href)
														
 
															+                        download_urls.append({
														
 
															+                            'url': full_url,
														
 
															+                            'type': 'chinaz_download',
														
 
															+                            'text': text
														
 
															+                        })
														
 
															+            
														
 
															+            for link in download_links:
														
 
															+                href = link.get('href', '')
														
 
															+                if re.search(r'\.(zip|rar|7z|ttf|otf)', href, re.I):
														
 
															+                    full_url = urljoin(self.CHINAZ_BASE, href)
														
 
															+                    if not any(u['url'] == full_url for u in download_urls):
														
 
															+                        download_urls.append({
														
 
															+                            'url': full_url,
														
 
															+                            'type': 'direct_file',
														
 
															+                            'text': link.get_text(strip=True)
														
 
															+                        })
														
 
															+            
														
 
															+            print(f"  解析到 {len(download_urls)} 个下载链接")
														
 
															+            
														
 
															+            if download_urls:
														
 
															+                font = {
														
 
															+                    'id': 'chinaz_' + str(int(time.time())),
														
 
															+                    'name': font_name,
														
 
															+                    'url': url
														
 
															+                }
														
 
															+                
														
 
															+                for url_info in download_urls:
														
 
															+                    print(f"  尝试下载: {url_info['url'][:60]}...")
														
 
															+                    save_path = self._download_file(url_info['url'], font_name)
														
 
															+                    if save_path:
														
 
															+                        print(f"  下载成功: {save_path}")
														
 
															+                        return True
														
 
															+            
														
 
															+            print("  未找到可下载的链接")
														
 
															+            return False
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            print(f"  处理失败: {e}")
														
 
															+            return False
														
 
															+    
														
 
															+    def download(self, url: str, save_path: str = None) -> Optional[str]:
														
 
															+        print(f"\n下载: {url}")
														
 
															+        
														
 
															+        try:
														
 
															+            resp = self.sess.get(url, stream=True, timeout=60)
														
 
															+            
														
 
															+            if resp.status_code != 200:
														
 
															+                print(f"  HTTP状态码: {resp.status_code}")
														
 
															+                return None
														
 
															+            
														
 
															+            if not save_path:
														
 
															+                content_disposition = resp.headers.get('Content-Disposition', '')
														
 
															+                save_path = self._extract_filename(content_disposition, url, 'downloaded_font')
														
 
															+                save_path = os.path.join(self.download_dir, self._sanitize_filename(save_path))
														
 
															+            
														
 
															+            total_size = int(resp.headers.get('Content-Length', 0))
														
 
															+            
														
 
															+            with open(save_path, 'wb') as f:
														
 
															+                if total_size > 0:
														
 
															+                    with tqdm(total=total_size, unit='B', unit_scale=True, desc='  下载') as pbar:
														
 
															+                        for chunk in resp.iter_content(chunk_size=8192):
														
 
															+                            if chunk:
														
 
															+                                f.write(chunk)
														
 
															+                                pbar.update(len(chunk))
														
 
															+                else:
														
 
															+                    for chunk in resp.iter_content(chunk_size=8192):
														
 
															+                        if chunk:
														
 
															+                            f.write(chunk)
														
 
															+            
														
 
															+            print(f"  保存到: {save_path}")
														
 
															+            return save_path
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            print(f"  下载失败: {e}")
														
 
															+            return None
														
 
															-        pass
														
 
															+def main():
														
 
															+    font_downloader = Font()
														
 
															+    font_downloader.run()
														
 
															-    def crawl_chinaz(self):
														
 
															-        pass
														
 
															-    
														
 
															-    def download(self):
														
 
															-        pass
														
 
															-if __name__=='__main__':
														
 
															-    pass
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,12 @@
 
															 [project]
														
 
															 name = "crawl-font"
														
 
															 version = "0.1.0"
														
 
															-description = "Add your description here"
														
 
															+description = "Font downloader for fonts.net.cn and other font websites"
														
 
															 readme = "README.md"
														
 
															 requires-python = ">=3.12"
														
 
															 dependencies = [
														
 
															     "requests>=2.32.5",
														
 
															+    "beautifulsoup4>=4.12.0",
														
 
															+    "lxml>=5.0.0",
														
 
															+    "tqdm>=4.66.0",
														
 
															 ]