lyq
/
pdf2html


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738
							#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Contact :   liuyuqi.gov@msn.cn
@Time    :   2023/12/09 14:57:36
@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
@Desc    :   enter point

recycle read all files in a directory, and find *.pdf files, then convert x.pdf to x.html
'''

import os,sys,re,shutil

def convert():
    ''''''
    current_dir = os.getcwd()
    if not os.path.exists(os.path.join(current_dir,'htmls')):
        os.mkdir(os.path.join(current_dir,'htmls'))
    for root, dirs, files in os.walk(current_dir):
        for file in files:
            if file.endswith('.pdf'):
                try:
                    os.system('pdf2htmlEX --zoom 1.3 --process-outline 0 --page-filename %s.html %s'%(file,file))
                    print('convert %s to %s.html'%(file,file))
                except Exception as e:
                    print(f'convert failed: {e}')
    
    # move all .html to htmls diretory
    for root, dirs, files in os.walk(current_dir):
        for file in files:
            if file.endswith('.html'):
                try:
                    shutil.move(os.path.join(root,file),os.path.join(current_dir,'htmls'))
                except Exception as e:
                    print(f'move failed: {e}')

if __name__=='__main__':
    convert()