import docx
import json
import os

def read_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    
    # Also attempt to read tables
    tables_content = []
    for table in doc.tables:
        table_data = []
        for row in table.rows:
            row_data = [cell.text.strip() for cell in row.cells]
            table_data.append(row_data)
        tables_content.append(table_data)
        
    return {
        "text": "\n".join(full_text),
        "tables": tables_content
    }

if __name__ == "__main__":
    docx_path = r"c:\xampp\htdocs\New_Stram_Site_web\contenu.docx"
    output_path = r"c:\xampp\htdocs\New_Stram_Site_web\scratch\contenu.json"
    content = read_docx(docx_path)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(content, f, indent=2, ensure_ascii=False)
    print(f"Content saved to {output_path}")
