Files
Buffteks-Website/webpages/bufftools_pages/letter_generator.py
2025-03-31 19:53:36 -05:00

208 lines
8.1 KiB
Python

import os
import tempfile
import streamlit as st
from docx import Document
from io import BytesIO
import base64
import re
from zipfile import ZipFile
import subprocess
import shutil
def sanitize_filename(name):
"""Convert name to a safe filename"""
name = re.sub(r'[^\w\s-]', '', str(name)).strip()
return re.sub(r'[-\s]+', '_', name)
def check_pandoc_installed():
try:
subprocess.run(["pandoc", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def check_libreoffice_installed():
try:
subprocess.run(["libreoffice", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def convert_with_libreoffice(docx_path, pdf_path):
"""Convert using LibreOffice (better for complex docs with images)"""
try:
cmd = [
"libreoffice",
"--headless",
"--convert-to", "pdf",
"--outdir", os.path.dirname(pdf_path),
docx_path
]
result = subprocess.run(cmd, check=True)
# LibreOffice names output as input file but with .pdf extension
expected_path = os.path.splitext(docx_path)[0] + ".pdf"
if os.path.exists(expected_path):
if expected_path != pdf_path:
shutil.move(expected_path, pdf_path)
return True
return False
except subprocess.CalledProcessError as e:
st.error(f"LibreOffice conversion failed: {str(e)}")
return False
def convert_to_pdf(docx_path, pdf_path):
"""Try multiple conversion methods to preserve images"""
# First try LibreOffice if available
if check_libreoffice_installed():
if convert_with_libreoffice(docx_path, pdf_path):
return True
# Fallback to Pandoc if LibreOffice fails or isn't available
if check_pandoc_installed():
try:
cmd = [
"pandoc",
docx_path,
"-o", pdf_path,
"--pdf-engine=xelatex",
"--resource-path", os.path.dirname(docx_path),
"--extract-media", os.path.dirname(docx_path)
]
subprocess.run(cmd, check=True)
return True
except subprocess.CalledProcessError as e:
st.error(f"Pandoc conversion failed: {str(e)}")
return False
def letter_generator():
st.title("📄 Generate Letters for Recipient")
st.markdown("""
Generate documents based on recipient names in Word/PDF files.
This is useful for creating personalized letters or certificates based on given templates.
""")
# Check requirements
if not (check_pandoc_installed() or check_libreoffice_installed()):
st.error("""
**Required tools missing**:
- Install [LibreOffice](https://www.libreoffice.org/) for best PDF conversion
- Or install [Pandoc](https://pandoc.org/installing.html) as fallback
""")
st.stop()
st.image("./images/ButtTools-SampleTemplatePlaceHolder.jpg", width=400, caption="Sample Template with Placeholder")
# File upload
st.subheader("1. Upload Template")
template_file = st.file_uploader("Word template (.docx)", type=["docx"])
# Placeholder config
st.subheader("2. Configure Placeholders")
placeholder = st.text_input("Placeholder to replace (e.g., [NAME])", "[NAME]")
# Data input
st.subheader("3. Enter Values (Names)")
names = st.text_area("List values (one per line)", height=150).split('\n')
# Output options
st.subheader("4. Output Format")
output_format = st.radio("", ["Word", "PDF"], index=1)
if st.button("✨ Generate Documents"):
if not template_file:
st.error("Please upload a template file")
return
names = [n.strip() for n in names if n.strip()]
if not names:
st.error("Please enter at least one value")
return
generate_documents(template_file, names, placeholder, output_format)
def generate_documents(template_file, names, placeholder, output_format):
with st.spinner(f"Generating {len(names)} {output_format} files..."):
with tempfile.TemporaryDirectory() as temp_dir:
# Save template
template_path = os.path.join(temp_dir, "template.docx")
with open(template_path, "wb") as f:
f.write(template_file.getbuffer())
results = []
progress_bar = st.progress(0)
for i, name in enumerate(names):
progress = (i + 1) / len(names)
progress_bar.progress(progress)
try:
# Customize document
doc = Document(template_path)
for p in doc.paragraphs:
if placeholder in p.text:
for r in p.runs:
if placeholder in r.text:
r.text = r.text.replace(placeholder, name)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
if placeholder in cell.text:
for p in cell.paragraphs:
for r in p.runs:
if placeholder in r.text:
r.text = r.text.replace(placeholder, name)
# Save output
safe_name = sanitize_filename(name)
if output_format == "Word":
output_path = os.path.join(temp_dir, f"output_{safe_name}.docx")
doc.save(output_path)
results.append(output_path)
else:
word_path = os.path.join(temp_dir, f"temp_{safe_name}.docx")
pdf_path = os.path.join(temp_dir, f"output_{safe_name}.pdf")
doc.save(word_path)
if convert_to_pdf(word_path, pdf_path):
results.append(pdf_path)
os.unlink(word_path)
except Exception as e:
st.error(f"Error processing {name}: {str(e)}")
progress_bar.empty()
if results:
# Create download package
zip_buffer = BytesIO()
with ZipFile(zip_buffer, "w") as zipf:
for file_path in results:
with open(file_path, "rb") as f:
ext = ".pdf" if output_format == "PDF" else ".docx"
name = os.path.basename(file_path).replace("temp_", "").replace("output_", "")
zipf.writestr(f"document_{name}{ext}", f.read())
# Show results
st.success(f"Generated {len(results)} documents")
st.download_button(
"📥 Download All",
data=zip_buffer.getvalue(),
file_name=f"documents_{output_format.lower()}.zip",
mime="application/zip"
)
# Preview first PDF
if output_format == "PDF" and results:
with open(results[0], "rb") as f:
st.subheader("First Document Preview")
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
st.markdown(
f'<iframe src="data:application/pdf;base64,{base64_pdf}" '
'width="100%" height="600px" style="border:1px solid #eee;"></iframe>',
unsafe_allow_html=True
)