12345678910111213141516171819202122 |
- import pytesseract
- from PIL import Image
- import cv2
- import numpy as np
- def extract_text_from_image(image_path):
- """从图片中提取文本"""
- # 使用OpenCV读取图像
- img = cv2.imread(image_path)
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- # 使用PIL将灰度图像转换为二值图像(黑白)
- _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
- # 使用pytesseract进行OCR
- text = pytesseract.image_to_string(thresh, lang='chi_sim') # 支持中文
- return text.strip()
- # 示例调用
- if __name__ == "__main__":
- text = extract_text_from_image('path_to_image.png')
- print("Extracted Text:", text)
|