208 lines
8.1 KiB
Python
208 lines
8.1 KiB
Python
import os
|
|
import tempfile
|
|
import streamlit as st
|
|
from docx import Document
|
|
from io import BytesIO
|
|
import base64
|
|
import re
|
|
from zipfile import ZipFile
|
|
import subprocess
|
|
import shutil
|
|
|
|
def sanitize_filename(name):
|
|
"""Convert name to a safe filename"""
|
|
name = re.sub(r'[^\w\s-]', '', str(name)).strip()
|
|
return re.sub(r'[-\s]+', '_', name)
|
|
|
|
def check_pandoc_installed():
|
|
try:
|
|
subprocess.run(["pandoc", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
return True
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
return False
|
|
|
|
def check_libreoffice_installed():
|
|
try:
|
|
subprocess.run(["libreoffice", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
return True
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
return False
|
|
|
|
def convert_with_libreoffice(docx_path, pdf_path):
|
|
"""Convert using LibreOffice (better for complex docs with images)"""
|
|
try:
|
|
cmd = [
|
|
"libreoffice",
|
|
"--headless",
|
|
"--convert-to", "pdf",
|
|
"--outdir", os.path.dirname(pdf_path),
|
|
docx_path
|
|
]
|
|
result = subprocess.run(cmd, check=True)
|
|
# LibreOffice names output as input file but with .pdf extension
|
|
expected_path = os.path.splitext(docx_path)[0] + ".pdf"
|
|
if os.path.exists(expected_path):
|
|
if expected_path != pdf_path:
|
|
shutil.move(expected_path, pdf_path)
|
|
return True
|
|
return False
|
|
except subprocess.CalledProcessError as e:
|
|
st.error(f"LibreOffice conversion failed: {str(e)}")
|
|
return False
|
|
|
|
def convert_to_pdf(docx_path, pdf_path):
|
|
"""Try multiple conversion methods to preserve images"""
|
|
# First try LibreOffice if available
|
|
if check_libreoffice_installed():
|
|
if convert_with_libreoffice(docx_path, pdf_path):
|
|
return True
|
|
|
|
# Fallback to Pandoc if LibreOffice fails or isn't available
|
|
if check_pandoc_installed():
|
|
try:
|
|
cmd = [
|
|
"pandoc",
|
|
docx_path,
|
|
"-o", pdf_path,
|
|
"--pdf-engine=xelatex",
|
|
"--resource-path", os.path.dirname(docx_path),
|
|
"--extract-media", os.path.dirname(docx_path)
|
|
]
|
|
subprocess.run(cmd, check=True)
|
|
return True
|
|
except subprocess.CalledProcessError as e:
|
|
st.error(f"Pandoc conversion failed: {str(e)}")
|
|
|
|
return False
|
|
|
|
def letter_generator():
|
|
st.title("📄 Generate Letters for Recipient")
|
|
st.markdown("""
|
|
Generate documents based on recipient names in Word/PDF files.
|
|
|
|
This is useful for creating personalized letters or certificates based on given templates.
|
|
""")
|
|
|
|
# Check requirements
|
|
if not (check_pandoc_installed() or check_libreoffice_installed()):
|
|
st.error("""
|
|
**Required tools missing**:
|
|
- Install [LibreOffice](https://www.libreoffice.org/) for best PDF conversion
|
|
- Or install [Pandoc](https://pandoc.org/installing.html) as fallback
|
|
""")
|
|
st.stop()
|
|
|
|
st.image("./images/ButtTools-SampleTemplatePlaceHolder.jpg", width=400, caption="Sample Template with Placeholder")
|
|
|
|
|
|
# File upload
|
|
st.subheader("1. Upload Template")
|
|
template_file = st.file_uploader("Word template (.docx)", type=["docx"])
|
|
|
|
# Placeholder config
|
|
st.subheader("2. Configure Placeholders")
|
|
placeholder = st.text_input("Placeholder to replace (e.g., [NAME])", "[NAME]")
|
|
|
|
# Data input
|
|
st.subheader("3. Enter Values (Names)")
|
|
names = st.text_area("List values (one per line)", height=150).split('\n')
|
|
|
|
# Output options
|
|
st.subheader("4. Output Format")
|
|
output_format = st.radio("", ["Word", "PDF"], index=1)
|
|
|
|
if st.button("✨ Generate Documents"):
|
|
if not template_file:
|
|
st.error("Please upload a template file")
|
|
return
|
|
|
|
names = [n.strip() for n in names if n.strip()]
|
|
if not names:
|
|
st.error("Please enter at least one value")
|
|
return
|
|
|
|
generate_documents(template_file, names, placeholder, output_format)
|
|
|
|
def generate_documents(template_file, names, placeholder, output_format):
|
|
with st.spinner(f"Generating {len(names)} {output_format} files..."):
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Save template
|
|
template_path = os.path.join(temp_dir, "template.docx")
|
|
with open(template_path, "wb") as f:
|
|
f.write(template_file.getbuffer())
|
|
|
|
results = []
|
|
progress_bar = st.progress(0)
|
|
|
|
for i, name in enumerate(names):
|
|
progress = (i + 1) / len(names)
|
|
progress_bar.progress(progress)
|
|
|
|
try:
|
|
# Customize document
|
|
doc = Document(template_path)
|
|
for p in doc.paragraphs:
|
|
if placeholder in p.text:
|
|
for r in p.runs:
|
|
if placeholder in r.text:
|
|
r.text = r.text.replace(placeholder, name)
|
|
|
|
for table in doc.tables:
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
if placeholder in cell.text:
|
|
for p in cell.paragraphs:
|
|
for r in p.runs:
|
|
if placeholder in r.text:
|
|
r.text = r.text.replace(placeholder, name)
|
|
|
|
# Save output
|
|
safe_name = sanitize_filename(name)
|
|
if output_format == "Word":
|
|
output_path = os.path.join(temp_dir, f"output_{safe_name}.docx")
|
|
doc.save(output_path)
|
|
results.append(output_path)
|
|
else:
|
|
word_path = os.path.join(temp_dir, f"temp_{safe_name}.docx")
|
|
pdf_path = os.path.join(temp_dir, f"output_{safe_name}.pdf")
|
|
doc.save(word_path)
|
|
|
|
if convert_to_pdf(word_path, pdf_path):
|
|
results.append(pdf_path)
|
|
os.unlink(word_path)
|
|
|
|
except Exception as e:
|
|
st.error(f"Error processing {name}: {str(e)}")
|
|
|
|
progress_bar.empty()
|
|
|
|
if results:
|
|
# Create download package
|
|
zip_buffer = BytesIO()
|
|
with ZipFile(zip_buffer, "w") as zipf:
|
|
for file_path in results:
|
|
with open(file_path, "rb") as f:
|
|
ext = ".pdf" if output_format == "PDF" else ".docx"
|
|
name = os.path.basename(file_path).replace("temp_", "").replace("output_", "")
|
|
zipf.writestr(f"document_{name}{ext}", f.read())
|
|
|
|
# Show results
|
|
st.success(f"Generated {len(results)} documents")
|
|
st.download_button(
|
|
"📥 Download All",
|
|
data=zip_buffer.getvalue(),
|
|
file_name=f"documents_{output_format.lower()}.zip",
|
|
mime="application/zip"
|
|
)
|
|
|
|
# Preview first PDF
|
|
if output_format == "PDF" and results:
|
|
with open(results[0], "rb") as f:
|
|
st.subheader("First Document Preview")
|
|
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
|
st.markdown(
|
|
f'<iframe src="data:application/pdf;base64,{base64_pdf}" '
|
|
'width="100%" height="600px" style="border:1px solid #eee;"></iframe>',
|
|
unsafe_allow_html=True
|
|
)
|