# -*- coding: utf-8 -*-

"""
This program receive a xml file name as input parameter.
The xml is expected to be a b2b (usually passive) document.
It is parsed to extract significant data to process the contained passive document from a Cobol application.
Plus in details, it will generates:
 - a xml smaller general document with witch is an index referencing the passive document found into the input file;
 - a xml file document reporting its content cleaned by the attachments binary data
 - a binary file for each attachment extracted from the document content
The name of the first file (the index) is received as second parameter
The xml files for content end attachments are generated by the program as temporary files.
The names of the generated temporary files are reported into the index file.
"""

import sys
import os
import xml.etree.ElementTree as ElemTree
import xml.dom.minidom as md
import base64
import tempfile
import lxml.etree as let
import pdfkit

inputfile = sys.argv[1]
outputfile = sys.argv[2]

# defines the transformer in according as the assosoftware style sheet
xslt = let.parse(os.path.dirname(os.path.realpath(__file__)) + "/FoglioStileAssoSoftware.xsl")
transform = let.XSLT(xslt)

# initializes the result xml file
result = ElemTree.Element('data')
resultItems = ElemTree.SubElement(result, 'documenti')
resultItem = ElemTree.SubElement(resultItems, 'documento')

# the lighter document xml obtained is also saved to a temporary file
tfxml = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
# its name is reported into the result file
resultContent = ElemTree.SubElement(resultItem, "content")
resultContent.text = tfxml.name

# generiamo il pdf applicando il foglio di stile AssoSoftware
dom = let.parse(inputfile)
newdom = transform(dom)
tfpdf = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
tfpdf.close
html = let.tostring(newdom)
pdf = pdfkit.from_string(html, tfpdf.name)
resultHTML = ElemTree.SubElement(resultItem, "pdf")
resultHTML.text = tfpdf.name

# loads the original document xml file to manipulate
ixml = ElemTree.parse(inputfile)
# removes any attachments that save to temporary files
resultAllegati = ElemTree.SubElement(resultItem, "allegati")
for attach in ixml.iter('Allegati'):
    allegato = attach.find('Attachment')
    if allegato is not None and allegato.text is not None:
        # save to temporary files
        tfall = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
        content = base64.b64decode(allegato.text.encode('ascii'))
        tfall.write(content)
        tfall.close
        # the attachment temporary filename is reported into the result file ...
        resultAllegato = ElemTree.SubElement(resultAllegati, "allegato")
        resultFileName = ElemTree.SubElement(resultAllegato, "filename")
        resultFileName.text = tfall.name
        # ... with its file type
        formato = attach.find('FormatoAttachment')
        if formato is not None and formato.text is not None:
            resultFormato = ElemTree.SubElement(resultAllegato, "formato")
            resultFormato.text = formato.text.replace(".", "")
        # the attachment is removed from the original document to keep it small and readable by cobol
        attach.remove(allegato)
mydata = ElemTree.tostring(ixml._root)
tfxml.write(mydata)
tfxml.close

# the result xml file is saved
mydata = ElemTree.tostring(result)
dom = md.parseString(mydata)
pretty_xml = dom.toprettyxml()
myfile = open(outputfile, "w")
myfile.write(pretty_xml)
myfile.close