בקשת מידע | איזה ספריות פייתון אני צריך לזה?

יאיר דניאל

יש לי בקשה מוזרה...
יצרתי לפני תקופה איזו תוכנה לPDF באמצעות קוד פייתון, כעת אני על מחשב אחר, ואין לי AI.
האם מישהו יוכל בבקשה לעבור על הקוד ולבדוק לי אילו ספריות פייתון אני צריך להוריד למחשב בשביל להריץ ולקמפל את התוכנה הזו מחדש?

import os
import sys
import time
import threading
import json
import sqlite3
import re
import io
import shutil
import subprocess
import socket
import webbrowser
from flask import Flask, render_template, jsonify, send_from_directory, request, send_file
import fitz  # PyMuPDF

# --- CONFIGURATION & PATHS ---
if getattr(sys, 'frozen', False):
    BASE_DIR = sys._MEIPASS
else:
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))

STATIC_FOLDER = os.path.join(BASE_DIR, 'static')
TEMPLATE_FOLDER = os.path.join(BASE_DIR, 'templates')
DATA_DIR = os.path.join(os.getenv('APPDATA', os.getcwd()), "AdvancedPDF_Data")

# Ensure directories exist
for folder in [STATIC_FOLDER, TEMPLATE_FOLDER, DATA_DIR]:
    if not os.path.exists(folder):
        try: os.makedirs(folder)
        except: pass

DB_PATH = os.path.join(DATA_DIR, "library.db")
CONFIG_FILE = os.path.join(DATA_DIR, "config.json")

# --- EMBEDDED HTML TEMPLATE (Fallback if file missing) ---
HTML_CONTENT = r"""
<!DOCTYPE html>
<html><body><h1>Error: Please ensure index.html is in templates folder</h1></body></html>
"""

def init_resources():
    # Only creating favicon if needed, index.html is expected in templates folder now
    pass

app = Flask(__name__, template_folder=TEMPLATE_FOLDER, static_folder=STATIC_FOLDER)

current_config = {'root_paths': [], 'theme_mode': 0}
scan_status = {'is_scanning': False, 'total_files': 0, 'done': False}
indexer_status = {'is_indexing': False, 'total_files': 0, 'processed': 0, 'current': '', 'done': False, 'stop_flag': False}
server_status = {'last_heartbeat': time.time(), 'start_time': time.time()}

def get_free_port():
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.bind(('localhost', 0))
        port = sock.getsockname()[1]
        sock.close()
        return port
    except: return 5000

def init_db():
    conn = sqlite3.connect(DB_PATH, timeout=60)
    conn.execute("PRAGMA journal_mode=WAL") 
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS files (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    path TEXT UNIQUE, filename TEXT, mod_time REAL, scanned INTEGER DEFAULT 0)''')
    try:
        c.execute("CREATE VIRTUAL TABLE IF NOT EXISTS pages USING fts5(file_id UNINDEXED, page_num UNINDEXED, content)")
    except: pass
    c.execute('''CREATE TABLE IF NOT EXISTS favorites (path TEXT PRIMARY KEY)''')
    c.execute('''CREATE TABLE IF NOT EXISTS history (path TEXT PRIMARY KEY, last_access REAL)''')
    conn.commit()
    conn.close()

def load_config():
    if os.path.exists(CONFIG_FILE):
        try:
            with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
                current_config.update(json.load(f))
        except: pass

def save_config():
    with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
        json.dump(current_config, f)

def file_scan_worker():
    global scan_status
    scan_status.update({'is_scanning': True, 'done': False})
    
    conn = sqlite3.connect(DB_PATH, timeout=60)
    c = conn.cursor()
    
    c.execute("SELECT path, mod_time FROM files")
    db_files = {row[0]: row[1] for row in c.fetchall()}

    flat_list = []; found_paths = set(); to_insert = []
    tree = {'name': 'ספרים', 'type': 'folder', 'children': []}

    for root_p in current_config['root_paths']:
        if not os.path.exists(root_p): continue
        root_p = os.path.normpath(root_p).replace('\\', '/')

        for root, _, filenames in os.walk(root_p):
            for f in filenames:
                if f.lower().endswith('.pdf'):
                    full_p = os.path.join(root, f).replace('\\', '/')
                    found_paths.add(full_p)
                    
                    parent_dir = os.path.dirname(full_p)
                    folder_name = os.path.basename(parent_dir)
                    if not folder_name: folder_name = parent_dir
                    
                    flat_list.append({'n': f, 'p': full_p, 'd': folder_name})
                    mtime = os.path.getmtime(full_p)
                    
                    if full_p not in db_files:
                        to_insert.append((full_p, f, mtime))
                    elif db_files[full_p] != mtime:
                        # Reset scanned flag if file modified
                        c.execute("UPDATE files SET mod_time=?, scanned=0 WHERE path=?", (mtime, full_p))
            
            scan_status['total_files'] = len(found_paths)
            time.sleep(0.0001)

        path_items = [item for item in flat_list if item['p'].startswith(root_p)]
        for item in path_items:
            rel_path = os.path.relpath(item['p'], root_p).replace('\\', '/')
            parts = rel_path.split('/')
            curr = tree 
            for part in parts[:-1]:
                found = next((c for c in curr['children'] if c.get('name') == part and c['type'] == 'folder'), None)
                if not found:
                    found = {'name': part, 'type': 'folder', 'children': []}
                    curr['children'].append(found)
                curr = found
            curr['children'].append({'name': item['n'], 'type': 'file', 'path': item['p']})

    if to_insert:
        c.executemany("INSERT INTO files (path, filename, mod_time) VALUES (?,?,?)", to_insert)
    
    for p in db_files:
        if p not in found_paths:
            c.execute("DELETE FROM files WHERE path=?", (p,))
            
    conn.commit(); conn.close()

    with open(os.path.join(DATA_DIR, "tree.json"), 'w', encoding='utf-8') as f:
        json.dump({'tree': tree, 'flat': flat_list}, f)
        
    scan_status.update({'done': True, 'is_scanning': False})
    
    # Auto-start indexing NOT triggered here to allow user control, 
    # but scan resets 'scanned' flags so indexer will pick them up when requested.

def content_indexer_worker():
    global indexer_status
    if indexer_status['is_indexing']: return

    indexer_status.update({'is_indexing': True, 'processed': 0, 'done': False, 'stop_flag': False})
    
    try:
        # 1. Fetch tasks - Only those not scanned yet
        conn = sqlite3.connect(DB_PATH, timeout=60)
        c = conn.cursor()
        c.execute("SELECT id, path, filename FROM files WHERE scanned = 0")
        tasks = c.fetchall()
        conn.close()

        indexer_status['total_files'] = len(tasks)
        if not tasks:
            indexer_status.update({'is_indexing': False, 'done': True})
            return

        # 2. Process tasks - BATCH WRITING FOR SPEED (50 at a time)
        batch_size = 50
        pending_pages = []
        pending_ids = []

        conn = sqlite3.connect(DB_PATH, timeout=60)
        c = conn.cursor()
        
        for i, (fid, path, fname) in enumerate(tasks):
            if indexer_status['stop_flag']: break
            indexer_status['current'] = fname
            
            try:
                fitz.TOOLS.mupdf_display_errors(False)
                doc = fitz.open(path)
                for page_num, page in enumerate(doc, start=1):
                    if indexer_status['stop_flag']: break
                    text = page.get_text()
                    if text and len(text.strip()) > 3:
                        # FIX: Using join(split()) handles newlines/tabs better for phrase search
                        clean_text = " ".join(text.split())
                        pending_pages.append((fid, page_num, clean_text))
                doc.close()
                pending_ids.append(fid)
            except Exception as e:
                print(f"Skipped reading {fname}: {e}")
                # Mark as scanned even if failed to avoid infinite loop
                pending_ids.append(fid)

            # Commit batch
            if len(pending_ids) >= batch_size or i == len(tasks) - 1:
                try:
                    c.execute("BEGIN IMMEDIATE")
                    for pfid in pending_ids:
                        c.execute("DELETE FROM pages WHERE file_id=?", (pfid,))
                    if pending_pages:
                        c.executemany("INSERT INTO pages VALUES (?, ?, ?)", pending_pages)
                    for pfid in pending_ids:
                        c.execute("UPDATE files SET scanned=1 WHERE id=?", (pfid,))
                    conn.commit()
                except Exception as e:
                    print(f"DB Batch Error: {e}")
                    conn.rollback()
                
                # Reset batch
                pending_pages = []
                pending_ids = []

            indexer_status['processed'] = i + 1

        conn.close()

    except Exception as e: print(f"Index Worker Error: {e}")

    indexer_status.update({'done': True, 'is_indexing': False, 'current': ''})

def start_indexer_thread():
    if not indexer_status['is_indexing']:
        threading.Thread(target=content_indexer_worker, daemon=True).start()

def monitor_shutdown():
    while True:
        time.sleep(2)
        if time.time() - server_status['start_time'] < 10: continue 
        if time.time() - server_status['last_heartbeat'] > 5:
            os._exit(0)

def open_browser_app(port):
    # FIX: Reduced delay for faster startup
    time.sleep(0.5)
    url = f'http://127.0.0.1:{port}'
    browsers = ['msedge', 'chrome', 'google-chrome']
    browser_path = None
    for b in browsers:
        if shutil.which(b):
            browser_path = shutil.which(b)
            break  
    if not browser_path and os.name == 'nt':
        paths = [
            r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
            r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
            r"C:\Program Files\Google\Chrome\Application\chrome.exe",
            r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
        ]
        for p in paths:
            if os.path.exists(p):
                browser_path = p
                break
    if browser_path:
        cmd = [browser_path, f'--app={url}', '--start-maximized', '--new-window', '--disable-infobars']
        subprocess.Popen(cmd)
    else:
        webbrowser.open(url)

@app.route('/')
def index(): return render_template('index.html')

@app.route('/heartbeat')
def heartbeat():
    server_status['last_heartbeat'] = time.time()
    return jsonify({'status': 'ok'})

@app.route('/scan_start')
def scan_start():
    if not scan_status['is_scanning']:
        threading.Thread(target=file_scan_worker, daemon=True).start()
    return jsonify({'status': 'started'})

@app.route('/scan_status')
def get_scan_status(): return jsonify(scan_status)

@app.route('/index_start')
def index_start():
    # Only start if needed (indexer worker checks for unscanned files)
    indexer_status['stop_flag'] = False
    start_indexer_thread()
    return jsonify({'status': 'started'})

@app.route('/index_stop')
def index_stop():
    indexer_status['stop_flag'] = True
    return jsonify({'status': 'stopping'})

@app.route('/index_status')
def get_index_status(): return jsonify(indexer_status)

@app.route('/clear_index')
def clear_index():
    try:
        indexer_status['stop_flag'] = True
        time.sleep(0.5)
        conn = sqlite3.connect(DB_PATH, timeout=60)
        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS pages")
        c.execute("CREATE VIRTUAL TABLE pages USING fts5(file_id UNINDEXED, page_num UNINDEXED, content)")
        c.execute("UPDATE files SET scanned=0")
        conn.commit()
        conn.close()
        indexer_status['is_indexing'] = False
        indexer_status['stop_flag'] = False
        start_indexer_thread()
        return jsonify({'status': 'ok'})
    except Exception as e: return jsonify({'status': 'error', 'message': str(e)})

@app.route('/search')
def search():
    q = request.args.get('q', '').strip()
    # Normalize spaces for better sentence matching
    q = re.sub(r'\s+', ' ', q).strip()
    
    conn = sqlite3.connect(DB_PATH, timeout=60); conn.row_factory = sqlite3.Row; c = conn.cursor()
    
    # Use strict phrase match (quoted) for FTS5
    query = """SELECT f.path, f.filename, p.page_num, snippet(pages, 2, '', '', '...', 40) as snip
               FROM pages p JOIN files f ON p.file_id = f.id WHERE p.content MATCH ? LIMIT 300"""
    try:
        # Wrapping in quotes forces FTS to search as a phrase
        q_safe = q.replace('"', '""')
        c.execute(query, (f'"{q_safe}"',))
        res = []
        for r in c.fetchall():
            parent_dir = os.path.dirname(r['path'])
            folder_name = os.path.basename(parent_dir)
            if not folder_name: folder_name = parent_dir
            res.append({'path': r['path'], 'name': r['filename'], 'folder': folder_name, 'page': r['page_num'], 'snippet': r['snip']})
    except: res = []
    conn.close(); return jsonify(res)

@app.route('/fav_toggle', methods=['POST'])
def fav_toggle():
    path = request.json.get('path')
    conn = sqlite3.connect(DB_PATH, timeout=60); c = conn.cursor()
    added = False
    try:
        c.execute("INSERT INTO favorites (path) VALUES (?)", (path,))
        added = True
    except sqlite3.IntegrityError:
        c.execute("DELETE FROM favorites WHERE path=?", (path,))
        added = False
    conn.commit(); conn.close()
    return jsonify({'status': 'ok', 'added': added})

@app.route('/fav_list')
def fav_list():
    conn = sqlite3.connect(DB_PATH, timeout=60); c = conn.cursor()
    c.execute("SELECT path FROM favorites")
    res = [{'path': r[0], 'name': os.path.basename(r[0])} for r in c.fetchall()]
    conn.close()
    return jsonify(res)

@app.route('/history_log', methods=['POST'])
def history_log():
    path = request.json.get('path')
    if path:
        try:
            conn = sqlite3.connect(DB_PATH, timeout=60); c = conn.cursor()
            c.execute("INSERT OR REPLACE INTO history (path, last_access) VALUES (?, ?)", (path, time.time()))
            conn.commit(); conn.close()
        except: pass
    return jsonify({'status': 'ok'})

@app.route('/get_history')
def get_history():
    conn = sqlite3.connect(DB_PATH, timeout=60); c = conn.cursor()
    c.execute("SELECT path FROM history ORDER BY last_access DESC LIMIT 50")
    res = [{'path': r[0], 'name': os.path.basename(r[0])} for r in c.fetchall()]
    conn.close()
    return jsonify(res)

@app.route('/pdf')
def get_pdf():
    path = request.args.get('path')
    q = request.args.get('q', '')
    try:
        # FIX: If no query, serve file directly (FAST LOAD)
        if not q:
             return send_file(path, mimetype='application/pdf')
             
        # Only process PyMuPDF highlighting if there IS a query
        doc = fitz.open(path)
        for page in doc:
            for inst in page.search_for(q):
                page.add_highlight_annot(inst)
        pdf_bytes = doc.write()
        doc.close()
        return send_file(io.BytesIO(pdf_bytes), mimetype='application/pdf')
    except: return "Error opening PDF", 404

@app.route('/preview_page')
def preview_page():
    path = request.args.get('path')
    page_num = int(request.args.get('page', 1))
    q = request.args.get('q', '')
    try:
        doc = fitz.open(path)
        new_doc = fitz.open()
        new_doc.insert_pdf(doc, from_page=page_num-1, to_page=page_num-1)
        if q:
            for inst in new_doc[0].search_for(q):
                new_doc[0].add_highlight_annot(inst)
        pdf_bytes = new_doc.write()
        new_doc.close(); doc.close()
        return send_file(io.BytesIO(pdf_bytes), mimetype='application/pdf')
    except: return "Error previewing", 404

@app.route('/browse')
def browse():
    try:
        cmd = [sys.executable, '-c', "import tkinter as tk, sys; from tkinter import filedialog; root=tk.Tk(); root.withdraw(); root.attributes('-topmost', True); path=filedialog.askdirectory(); root.destroy(); sys.stdout.buffer.write(path.encode('utf-8'))"]
        kwargs = {}
        if os.name == 'nt': kwargs['creationflags'] = 0x08000000 
        path = subprocess.check_output(cmd, **kwargs).decode('utf-8').strip()
        if path: return jsonify({'status': 'ok', 'path': path.replace('\\', '/')})
    except: pass
    return jsonify({'status': 'cancel'})

@app.route('/update_paths', methods=['POST'])
def update_paths():
    p = request.json.get('paths', [])
    current_config['root_paths'] = p
    save_config(); return jsonify({'status': 'ok'})

@app.route('/save_prefs', methods=['POST'])
def save_prefs():
    data = request.json
    current_config['theme_mode'] = data.get('theme_mode', 0)
    save_config(); return jsonify({'status': 'ok'})

@app.route('/get_tree')
def get_tree():
    p = os.path.join(DATA_DIR, "tree.json")
    if os.path.exists(p):
        with open(p, 'r', encoding='utf-8') as f: return jsonify(json.load(f))
    return jsonify({'tree': {'children': []}, 'flat': []})

@app.route('/get_init')
def get_init(): return jsonify({'paths': current_config['root_paths'], 'theme_mode': current_config['theme_mode']})

if __name__ == '__main__':
    try:
        init_resources()
        init_db()
        load_config()
        port = get_free_port()
        if not os.environ.get("WERKZEUG_RUN_MAIN"):
            threading.Thread(target=monitor_shutdown, daemon=True).start()
            threading.Thread(target=lambda: open_browser_app(port), daemon=True).start()
        app.run(port=port, debug=False, use_reloader=False)
    except Exception as e: print(f"CRITICAL ERROR: {e}")

אשמח אם אותו אחד/ מישהו אחר יוכל לטפל לי בבעיה הזו...

cfopuser

@יאיר-דניאל

pip install Flask PyMuPDF

יאיר דניאל

@cfopuser רק את זה? נראה לי מוזר, זכור לי שזה היה שלוש ספריות לפחות...
תודה בכ"א!

מה לגבי מה שבספויילר למעלה? - אשמח אם תציץ כשיהיה לך כמה דקות... תודה!