from transformers import pipeline import re # 加载预训练的BERT分类器 classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english") def classify_activity_type(description): """根据描述文本对活动进行类型分类""" result = classifier(description)[0] label = result['label'] # 简化标签映射到具体活动类型 if 'positive' in label.lower(): return "学术讲座" elif 'negative' in label.lower(): return "文体比赛" else: return "其他" ner_model = pipeline("ner", model="dslim/bert-base-NER") def extract_information(text): """从文本中抽取姓名和身份证号""" result = ner_model(text) name = None id_number = None for entity in result: if entity['entity'] == 'B-PER': # 姓名 if name is None: name = entity['word'].replace('#', '') elif entity['entity'] == 'I-PER': name += entity['word'].replace('#', '') # 使用正则表达式匹配身份证号 match = re.search(r'\d{17}[\dxX]|\d{15}', text) if match: id_number = match.group(0) return {'name': name, 'id_number': id_number}