lyq
/
flask_image_matcher


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
							# utils.py
import os
import cv2
import numpy as np
from PIL import Image
import openpyxl
from skimage.metrics import structural_similarity as ssim
from config import Config
from docx import Document

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in Config.ALLOWED_EXTENSIONS

def calculate_similarity(imageA, imageB):
    grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
    grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
    score, _ = ssim(grayA, grayB, full=True)
    return score

def extract_images_from_excel(file_path, time_label):
    workbook = openpyxl.load_workbook(file_path)
    images_info = []

    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        for img in sheet._images:
            from io import BytesIO
            image_data = img._data()
            image = Image.open(BytesIO(image_data))
            position = (img.anchor._from.row + 1, img.anchor._from.col + 1)

            # if time_label == "120分钟":  # Placeholder, implement actual logic
            images_info.append({
                'image': image,
                'position': position,
                'sheet_name': sheet_name
            })

    return images_info

def extract_images_from_wps(file_path, time_label):
    """
    从WPS文档(.docx)中提取所有图像，并保存到指定文件夹。
    
    :param file_path: WPS文档的路径
    :param time_label: 时间标签，用于创建唯一输出文件夹名称
    """
    output_folder = os.path.join('output_images', f'{time_label}_images')
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    document = Document(file_path)
    image_counter = 0
    images_info = []

    for rel in document.part.rels.values():
        if "image" in rel.target_ref:
            image_counter += 1
            image_part = rel.target_part
            image_filename = f"image_{image_counter}.png"
            image_filepath = os.path.join(output_folder, image_filename)

            with open(image_filepath, 'wb') as image_file:
                image_file.write(image_part.blob)

            # 打开并转换为NumPy数组
            pil_image = Image.open(image_filepath)
            img_np = np.array(pil_image)

            images_info.append({
                'image': img_np,
                'filename': os.path.basename(file_path),
                'position': f'image_{image_counter}',  # WPS 文档中没有 sheet_name 概念
                'similarity_score': None  # 初始值设置为 None
            })

    print(f"Finished extracting images from {file_path}. Total images extracted: {image_counter}")
    return images_info

def process_files(upload_folder, ref_image_path, similarity_threshold, time_label):
    results = []
    ref_img = cv2.imread(ref_image_path)

    for filename in os.listdir(upload_folder):
        file_path = os.path.join(upload_folder, filename)
        
        if filename.endswith(".xlsx"):
            images_info = extract_images_from_excel(file_path, time_label)
        elif filename.endswith((".docx", ".wps")):
            images_info = extract_images_from_wps(file_path, time_label)
        else:
            continue

        for info in images_info:
            img_pil = info['image']
            img_np = np.array(img_pil)  # Convert PIL Image to NumPy array
            img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) if len(img_np.shape) == 3 else img_np
            similarity_score = calculate_similarity(ref_img, img_cv2)

            if similarity_score >= similarity_threshold:
                results.append({
                    'filename': filename,
                    'sheet_name': info.get('sheet_name', 'N/A'),  # Excel 文件有 sheet_name, WPS 没有
                    'position': info['position'],
                    'similarity_score': similarity_score,
                    'image': img_np  # 确保这是一个 NumPy 数组
                })

    return results