Buffteks-Website/webpages/bufftools_pages/letter_generator.py

import os
import tempfile
import streamlit as st
from docx import Document
from io import BytesIO
import base64
import re
from zipfile import ZipFile
import subprocess
import shutil

def sanitize_filename(name):
    """Convert name to a safe filename"""
    name = re.sub(r'[^\w\s-]', '', str(name)).strip()
    return re.sub(r'[-\s]+', '_', name)

def check_pandoc_installed():
    try:
        subprocess.run(["pandoc", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        return False

def check_libreoffice_installed():
    try:
        subprocess.run(["libreoffice", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        return False

def convert_with_libreoffice(docx_path, pdf_path):
    """Convert using LibreOffice (better for complex docs with images)"""
    try:
        cmd = [
            "libreoffice",
            "--headless",
            "--convert-to", "pdf",
            "--outdir", os.path.dirname(pdf_path),
            docx_path
        ]
        result = subprocess.run(cmd, check=True)
        # LibreOffice names output as input file but with .pdf extension
        expected_path = os.path.splitext(docx_path)[0] + ".pdf"
        if os.path.exists(expected_path):
            if expected_path != pdf_path:
                shutil.move(expected_path, pdf_path)
            return True
        return False
    except subprocess.CalledProcessError as e:
        st.error(f"LibreOffice conversion failed: {str(e)}")
        return False

def convert_to_pdf(docx_path, pdf_path):
    """Try multiple conversion methods to preserve images"""
    # First try LibreOffice if available
    if check_libreoffice_installed():
        if convert_with_libreoffice(docx_path, pdf_path):
            return True

    # Fallback to Pandoc if LibreOffice fails or isn't available
    if check_pandoc_installed():
        try:
            cmd = [
                "pandoc",
                docx_path,
                "-o", pdf_path,
                "--pdf-engine=xelatex",
                "--resource-path", os.path.dirname(docx_path),
                "--extract-media", os.path.dirname(docx_path)
            ]
            subprocess.run(cmd, check=True)
            return True
        except subprocess.CalledProcessError as e:
            st.error(f"Pandoc conversion failed: {str(e)}")

    return False

def letter_generator():
    st.title("📄 Generate Letters for Recipient")
    st.markdown("""
    Generate documents based on recipient names in Word/PDF files.

    This is useful for creating personalized letters or certificates based on given templates.
    """)

    # Check requirements
    if not (check_pandoc_installed() or check_libreoffice_installed()):
        st.error("""
        **Required tools missing**:
        - Install [LibreOffice](https://www.libreoffice.org/) for best PDF conversion
        - Or install [Pandoc](https://pandoc.org/installing.html) as fallback
        """)
        st.stop()

    st.image("./images/ButtTools-SampleTemplatePlaceHolder.jpg", width=400, caption="Sample Template with Placeholder")


    # File upload
    st.subheader("1. Upload Template")
    template_file = st.file_uploader("Word template (.docx)", type=["docx"])

    # Placeholder config
    st.subheader("2. Configure Placeholders")
    placeholder = st.text_input("Placeholder to replace (e.g., [NAME])", "[NAME]")

    # Data input
    st.subheader("3. Enter Values (Names)")
    names = st.text_area("List values (one per line)", height=150).split('\n')

    # Output options
    st.subheader("4. Output Format")
    output_format = st.radio("", ["Word", "PDF"], index=1)

    if st.button("✨ Generate Documents"):
        if not template_file:
            st.error("Please upload a template file")
            return

        names = [n.strip() for n in names if n.strip()]
        if not names:
            st.error("Please enter at least one value")
            return

        generate_documents(template_file, names, placeholder, output_format)

def generate_documents(template_file, names, placeholder, output_format):
    with st.spinner(f"Generating {len(names)} {output_format} files..."):
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save template
            template_path = os.path.join(temp_dir, "template.docx")
            with open(template_path, "wb") as f:
                f.write(template_file.getbuffer())

            results = []
            progress_bar = st.progress(0)

            for i, name in enumerate(names):
                progress = (i + 1) / len(names)
                progress_bar.progress(progress)

                try:
                    # Customize document
                    doc = Document(template_path)
                    for p in doc.paragraphs:
                        if placeholder in p.text:
                            for r in p.runs:
                                if placeholder in r.text:
                                    r.text = r.text.replace(placeholder, name)

                    for table in doc.tables:
                        for row in table.rows:
                            for cell in row.cells:
                                if placeholder in cell.text:
                                    for p in cell.paragraphs:
                                        for r in p.runs:
                                            if placeholder in r.text:
                                                r.text = r.text.replace(placeholder, name)

                    # Save output
                    safe_name = sanitize_filename(name)
                    if output_format == "Word":
                        output_path = os.path.join(temp_dir, f"output_{safe_name}.docx")
                        doc.save(output_path)
                        results.append(output_path)
                    else:
                        word_path = os.path.join(temp_dir, f"temp_{safe_name}.docx")
                        pdf_path = os.path.join(temp_dir, f"output_{safe_name}.pdf")
                        doc.save(word_path)

                        if convert_to_pdf(word_path, pdf_path):
                            results.append(pdf_path)
                        os.unlink(word_path)

                except Exception as e:
                    st.error(f"Error processing {name}: {str(e)}")

            progress_bar.empty()

            if results:
                # Create download package
                zip_buffer = BytesIO()
                with ZipFile(zip_buffer, "w") as zipf:
                    for file_path in results:
                        with open(file_path, "rb") as f:
                            ext = ".pdf" if output_format == "PDF" else ".docx"
                            name = os.path.basename(file_path).replace("temp_", "").replace("output_", "")
                            zipf.writestr(f"document_{name}{ext}", f.read())

                # Show results
                st.success(f"Generated {len(results)} documents")
                st.download_button(
                    "📥 Download All",
                    data=zip_buffer.getvalue(),
                    file_name=f"documents_{output_format.lower()}.zip",
                    mime="application/zip"
                )

                # Preview first PDF
                if output_format == "PDF" and results:
                    with open(results[0], "rb") as f:
                        st.subheader("First Document Preview")
                        base64_pdf = base64.b64encode(f.read()).decode('utf-8')
                        st.markdown(
                            f'<iframe src="data:application/pdf;base64,{base64_pdf}" '
                            'width="100%" height="600px" style="border:1px solid #eee;"></iframe>',
                            unsafe_allow_html=True
                        )