#!/usr/bin/python
# encoding=UTF-8
# Copyright © 2008 Jakub Wilk <ubanus@users.sf.net>
#
# This package is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This package is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.

__doc__ = '''\
Usage:

  hocr2djvused < ocr_file.html | djvused -s ocr_file.djvu
\
'''

import sys

from ocrodjvu import hocr

def main():
    if len(sys.argv) > 1:
        print >>sys.stderr, __doc__
        sys.exit(1)
    for i, text in enumerate(hocr.extract_text(sys.stdin)):
        sys.stdout.write('select %d\nremove-txt\nset-txt\n' % (i + 1))
        text.print_into(sys.stdout, 80)
        sys.stdout.write('\n.\n\n')

if __name__ == '__main__':
    main()

# vim:ts=4 sw=4 et
