MakeAttachments
===============

Will look for items that are linked from just one place and also have no 
other links out. These 'deadends' will then be moved 'into' the linking item.

If the fields option is set to a list of tuples then these indicate changes
to make to item to merge in the subitem. The head of the list will be used as
the filename to relink any html links to.

If no fields are set then a folder will be created, the item set as its default
view and any subitems moved into that folder.


Our condition ensures in this doesn't produce a move there are only one subitem.

>>> from collective.transmogrifier.tests import registerConfig
>>> from collective.transmogrifier.transmogrifier import Transmogrifier
>>> transmogrifier = Transmogrifier(plone)
>>> config = """
... [transmogrifier]
... pipeline =
...     source
...     makeattachments 
...     treeserializer
...     printer
...     
... [source]
... blueprint = transmogrify.htmltesting.htmlbacklinksource
... level3/index=<a href="../level2/index">Level 2</a>
... level2/index=<a href="../level3/index">Level 3</a><img src="+&image%20blah">
... level2/+&image%20blah=<h1>content</h1>
...
... [makeattachments]
... blueprint = transmogrify.webcrawler.makeattachments
... fields = python:i>=0 and (('attachment'+str(i+1)+'Image', subitem['text']),('attachment'+str(i+1)+'Title', 'blah'), )
...
... [treeserializer]
... blueprint = transmogrify.webcrawler.treeserializer
...
... [printer]
... blueprint = collective.transmogrifier.sections.tests.pprinter
... """ 

Add two more subitems and then we get attachments

>>> registerConfig(u'test', config)
>>> transmogrifier(u'test')
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level2'}
{'_backlinks': [('http://test.com/level3/index', 'Level 2')],
 '_mimetype': 'text/html',
 '_path': 'level2/index',
 '_site_url': 'http://test.com/',
 'attachment1Image': '<h1>content</h1>',
 'attachment1Title': 'blah',
 'text': '<a href="../level3/index">Level 3</a><img src="+&image%20blah">'}
{'_origin': 'level2/+&image%20blah',
 '_path': 'level2/index/attachment1Image',
 '_site_url': 'http://test.com/'}
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level3'}
{'_backlinks': [('http://test.com/level2/index', 'Level 3')],
 '_mimetype': 'text/html',
 '_path': 'level3/index',
 '_site_url': 'http://test.com/',
 'text': '<a href="../level2/index">Level 2</a>'}

>>> config = """
... [transmogrifier]
... include = test
...     
... [source]
... level3/index=<a href="../level2/index">Level 2</a>
... level2/index=<a href="../level3/index">Level 3</a><img src="+&image%20blah"><img src="pdf">
... level2/+&image%20blah=<h1>content</h1>
... level2/pdf=<img src="pdf2">
... level2/pdf2=pdf2
...
... """ 
>>> registerConfig(u'test2', config)
>>> transmogrifier(u'test2')
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level2'}
{'_backlinks': [('http://test.com/level3/index', 'Level 2')],
 '_mimetype': 'text/html',
 '_path': 'level2/index',
 '_site_url': 'http://test.com/',
 'attachment1Image': '<h1>content</h1>',
 'attachment1Title': 'blah',
 'text': '<a href="../level3/index">Level 3</a><img src="+&image%20blah"><img src="pdf">'}
{'_origin': 'level2/+&image%20blah',
 '_path': 'level2/index/attachment1Image',
 '_site_url': 'http://test.com/'}
{'_backlinks': [('http://test.com/level2/index', '')],
 '_mimetype': 'text/html',
 '_path': 'level2/pdf',
 '_site_url': 'http://test.com/',
 'attachment1Image': 'pdf2',
 'attachment1Title': 'blah',
 'text': '<img src="pdf2">'}
{'_origin': 'level2/pdf2',
 '_path': 'level2/pdf/attachment1Image',
 '_site_url': 'http://test.com/'}
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level3'}
{'_backlinks': [('http://test.com/level2/index', 'Level 3')],
 '_mimetype': 'text/html',
 '_path': 'level3/index',
 '_site_url': 'http://test.com/',
 'text': '<a href="../level2/index">Level 2</a>'}

>>> config = """
... [transmogrifier]
... include = test2
...     
... [makeattachments]
... blueprint = transmogrify.webcrawler.makeattachments
... condition = python:subitem['_path'].count('pdf') and i>=0
...
... """ 
>>> registerConfig(u'test3', config)
>>> transmogrifier(u'test3')
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level2'}
{'_backlinks': [('http://test.com/level2/index', '')],
 '_mimetype': 'text/html',
 '_path': 'level2/+&image%20blah',
 '_site_url': 'http://test.com/',
 'text': '<h1>content</h1>'}
{'_backlinks': [('http://test.com/level3/index', 'Level 2')],
 '_mimetype': 'text/html',
 '_path': 'level2/index',
 '_site_url': 'http://test.com/',
 'text': '<a href="../level3/index">Level 3</a><img src="+&image%20blah"><img src="pdf">'}
{'_backlinks': [('http://test.com/level2/index', '')],
 '_mimetype': 'text/html',
 '_path': 'level2/pdf',
 '_site_url': 'http://test.com/',
 'attachment1Image': 'pdf2',
 'attachment1Title': 'blah',
 'text': '<img src="pdf2">'}
{'_origin': 'level2/pdf2',
 '_path': 'level2/pdf/attachment1Image',
 '_site_url': 'http://test.com/'}
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level3'}
{'_backlinks': [('http://test.com/level2/index', 'Level 3')],
 '_mimetype': 'text/html',
 '_path': 'level3/index',
 '_site_url': 'http://test.com/',
 'text': '<a href="../level2/index">Level 2</a>'}

It is possible to not use fields for attachments but rather use a folder with a 
default view. Just set fields to False (default).

>>> config = """
... [transmogrifier]
... include = test
...     
... [source]
... blueprint = transmogrify.webcrawler.test.htmlbacklinksource
... level3/index=<a href="level3"
... level2/index=<a href="../level3/index">Level 3</a><img src="+&image%20blah">
... level2/+&image%20blah=<h1>content</h1>
...
... """

>>> registerConfig(u'test4', config)
>>> transmogrifier(u'test4')
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level2'}
{'_mimetype': 'text/html',
 '_path': 'level2/index',
 '_site_url': 'http://test.com/',
 'attachment1Image': '<a href="level3"',
 'attachment1Title': 'blah',
 'attachment2Image': '<h1>content</h1>',
 'attachment2Title': 'blah',
 'text': '<a href="../level3/index">Level 3</a><img src="+&image%20blah">'}
{'_origin': 'level3/index',
 '_path': 'level2/index/attachment1Image',
 '_site_url': 'http://test.com/'}
{'_origin': 'level2/+&image%20blah',
 '_path': 'level2/index/attachment2Image',
 '_site_url': 'http://test.com/'}
     
>>> config = """
... [transmogrifier]
... include = test
...     
... [source]
... blueprint = transmogrify.webcrawler.test.htmlbacklinksource
... level3/index=<a href="level3"
... level2/index=<a href="../level3/index">Level 3</a><img src="+&image%20blah">
... level2/+&image%20blah=<h1>content</h1>
...
... [makeattachments]
... fields = python:False
...
... """ 
>>> registerConfig(u'test5', config)
>>> transmogrifier(u'test5')
{'_type': 'Folder', '_site_url': 'http://test.com/', '_path': 'level2'}
{'_defaultpage': 'index-html',
 '_path': 'level2/index',
 '_site_url': 'http://test.com/',
 '_type': 'Folder'}
{'_backlinks': [('http://test.com/level2/index', '')],
 '_mimetype': 'text/html',
 '_origin': 'level2/+&image%20blah',
 '_path': 'level2/index/+&image%20blah',
 '_site_url': 'http://test.com/',
 'text': '<h1>content</h1>'}
{'_backlinks': [('http://test.com/level2/index', 'Level 3')],
 '_mimetype': 'text/html',
 '_origin': 'level3/index',
 '_path': 'level2/index/index',
 '_site_url': 'http://test.com/',
 'text': '<a href="level3"'}
{'_mimetype': 'text/html',
 '_origin': 'level2/index',
 '_path': 'level2/index/index-html',
 '_site_url': 'http://test.com/',
 'text': '<a href="../level3/index">Level 3</a><img src="+&image%20blah">'}

Test content that isn't linked up to makes sure its still passed through 

>>> config = """
... [transmogrifier]
... pipeline =
...     source
...     makeattachments
...     treeserializer 
...     printer
...     
... [source]
... blueprint = transmogrify.webcrawler.test.htmlbacklinksource
... blah1=blah1
... blah2=blah2
...
... [makeattachments]
... blueprint = transmogrify.webcrawler.makeattachments
...
... [treeserializer]
... blueprint = transmogrify.webcrawler.treeserializer
...
... [printer]
... blueprint = collective.transmogrifier.sections.tests.pprinter
... """ 
>>> registerConfig(u'test5.5', config)
>>> transmogrifier(u'test5.5')
{'_mimetype': 'text/html',
 '_path': 'blah1',
 '_site_url': 'http://test.com/',
 'text': 'blah1'}
{'_mimetype': 'text/html',
 '_path': 'blah2',
 '_site_url': 'http://test.com/',
 'text': 'blah2'}

You can use a combination of folder and field attachments

>>> config = """
... [transmogrifier]
... pipeline =
...     source
...     makeattachments 
...     treeserializer
...     printer
...     
... [source]
... blueprint = transmogrify.webcrawler.test.htmlbacklinksource
... content=<img src="blah1"><img src="blah2">
... blah1=blah1
... blah2=blah2
...
... [makeattachments]
... blueprint = transmogrify.webcrawler.makeattachments
... fields = python:i<1 and [('attach%i'%i,subitem['text'])]
...
... [treeserializer]
... blueprint = transmogrify.webcrawler.treeserializer
...
... [printer]
... blueprint = collective.transmogrifier.sections.tests.pprinter
... """ 
>>> registerConfig(u'test6', config)
>>> transmogrifier(u'test6')
{'_defaultpage': 'index-html',
 '_path': 'content',
 '_site_url': 'http://test.com/',
 '_type': 'Folder'}
{'_backlinks': [('http://test.com/content', '')],
 '_mimetype': 'text/html',
 '_origin': 'blah2',
 '_path': 'content/blah2',
 '_site_url': 'http://test.com/',
 'text': 'blah2'}
{'_mimetype': 'text/html',
 '_origin': 'content',
 '_path': 'content/index-html',
 '_site_url': 'http://test.com/',
 'attach0': 'blah1',
 'text': '<img src="blah1"><img src="blah2">'}
{'_origin': 'blah1',
 '_path': 'content/index-html/attach0',
 '_site_url': 'http://test.com/'}

