import os import tempfile import streamlit as st from docx import Document from io import BytesIO import base64 import re from zipfile import ZipFile import subprocess import shutil def sanitize_filename(name): """Convert name to a safe filename""" name = re.sub(r'[^\w\s-]', '', str(name)).strip() return re.sub(r'[-\s]+', '_', name) def check_pandoc_installed(): try: subprocess.run(["pandoc", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except (subprocess.CalledProcessError, FileNotFoundError): return False def check_libreoffice_installed(): try: subprocess.run(["libreoffice", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except (subprocess.CalledProcessError, FileNotFoundError): return False def convert_with_libreoffice(docx_path, pdf_path): """Convert using LibreOffice (better for complex docs with images)""" try: cmd = [ "libreoffice", "--headless", "--convert-to", "pdf", "--outdir", os.path.dirname(pdf_path), docx_path ] result = subprocess.run(cmd, check=True) # LibreOffice names output as input file but with .pdf extension expected_path = os.path.splitext(docx_path)[0] + ".pdf" if os.path.exists(expected_path): if expected_path != pdf_path: shutil.move(expected_path, pdf_path) return True return False except subprocess.CalledProcessError as e: st.error(f"LibreOffice conversion failed: {str(e)}") return False def convert_to_pdf(docx_path, pdf_path): """Try multiple conversion methods to preserve images""" # First try LibreOffice if available if check_libreoffice_installed(): if convert_with_libreoffice(docx_path, pdf_path): return True # Fallback to Pandoc if LibreOffice fails or isn't available if check_pandoc_installed(): try: cmd = [ "pandoc", docx_path, "-o", pdf_path, "--pdf-engine=xelatex", "--resource-path", os.path.dirname(docx_path), "--extract-media", os.path.dirname(docx_path) ] subprocess.run(cmd, check=True) return True except subprocess.CalledProcessError as e: st.error(f"Pandoc conversion failed: {str(e)}") return False def letter_generator(): st.title("📄 Generate Letters for Recipient") st.markdown(""" Generate documents based on recipient names in Word/PDF files. This is useful for creating personalized letters or certificates based on given templates. """) # Check requirements if not (check_pandoc_installed() or check_libreoffice_installed()): st.error(""" **Required tools missing**: - Install [LibreOffice](https://www.libreoffice.org/) for best PDF conversion - Or install [Pandoc](https://pandoc.org/installing.html) as fallback """) st.stop() st.image("./images/ButtTools-SampleTemplatePlaceHolder.jpg", width=400, caption="Sample Template with Placeholder") # File upload st.subheader("1. Upload Template") template_file = st.file_uploader("Word template (.docx)", type=["docx"]) # Placeholder config st.subheader("2. Configure Placeholders") placeholder = st.text_input("Placeholder to replace (e.g., [NAME])", "[NAME]") # Data input st.subheader("3. Enter Values (Names)") names = st.text_area("List values (one per line)", height=150).split('\n') # Output options st.subheader("4. Output Format") output_format = st.radio("", ["Word", "PDF"], index=1) if st.button("✨ Generate Documents"): if not template_file: st.error("Please upload a template file") return names = [n.strip() for n in names if n.strip()] if not names: st.error("Please enter at least one value") return generate_documents(template_file, names, placeholder, output_format) def generate_documents(template_file, names, placeholder, output_format): with st.spinner(f"Generating {len(names)} {output_format} files..."): with tempfile.TemporaryDirectory() as temp_dir: # Save template template_path = os.path.join(temp_dir, "template.docx") with open(template_path, "wb") as f: f.write(template_file.getbuffer()) results = [] progress_bar = st.progress(0) for i, name in enumerate(names): progress = (i + 1) / len(names) progress_bar.progress(progress) try: # Customize document doc = Document(template_path) for p in doc.paragraphs: if placeholder in p.text: for r in p.runs: if placeholder in r.text: r.text = r.text.replace(placeholder, name) for table in doc.tables: for row in table.rows: for cell in row.cells: if placeholder in cell.text: for p in cell.paragraphs: for r in p.runs: if placeholder in r.text: r.text = r.text.replace(placeholder, name) # Save output safe_name = sanitize_filename(name) if output_format == "Word": output_path = os.path.join(temp_dir, f"output_{safe_name}.docx") doc.save(output_path) results.append(output_path) else: word_path = os.path.join(temp_dir, f"temp_{safe_name}.docx") pdf_path = os.path.join(temp_dir, f"output_{safe_name}.pdf") doc.save(word_path) if convert_to_pdf(word_path, pdf_path): results.append(pdf_path) os.unlink(word_path) except Exception as e: st.error(f"Error processing {name}: {str(e)}") progress_bar.empty() if results: # Create download package zip_buffer = BytesIO() with ZipFile(zip_buffer, "w") as zipf: for file_path in results: with open(file_path, "rb") as f: ext = ".pdf" if output_format == "PDF" else ".docx" name = os.path.basename(file_path).replace("temp_", "").replace("output_", "") zipf.writestr(f"document_{name}{ext}", f.read()) # Show results st.success(f"Generated {len(results)} documents") st.download_button( "📥 Download All", data=zip_buffer.getvalue(), file_name=f"documents_{output_format.lower()}.zip", mime="application/zip" ) # Preview first PDF if output_format == "PDF" and results: with open(results[0], "rb") as f: st.subheader("First Document Preview") base64_pdf = base64.b64encode(f.read()).decode('utf-8') st.markdown( f'', unsafe_allow_html=True )