#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  from_camel.py
#  from-camel
# 
#  Created by Lars Yencken on 06-10-2008.
#  Copyright 2008 Lars Yencken. All rights reserved.
#

"""
"""

import os, sys
import optparse
import codecs
import re

def from_camel(filename):
    """Converts all variable names in camel case."""
    bak_file = filename + '.bak'
    tmp_file = filename + '.tmp'
    i_stream = codecs.open(filename, 'r', 'utf8')
    o_stream = codecs.open(filename + '.bak', 'w', 'utf8')
    lines = i_stream.readlines()
    ignore_set = _detect_imports(lines)

    for line in lines:
        o_stream.write(_line_from_camel(line, ignore_set))
    i_stream.close()
    o_stream.close()

    # Swap files so that the modified one now has the original filename
    # and the original now has the .bak suffix.
    os.rename(bak_file, tmp_file)
    os.rename(filename, bak_file)
    os.rename(tmp_file, filename)

_camel_pattern = re.compile(r'[a-z]+[A-Z][a-zA-Z]+', re.UNICODE)

def _line_from_camel(line, ignore_set=None):
    """
    >>> _line_from_camel('dogEatDog')
    'dog_eat_dog'

    >>> _line_from_camel("def eat_dog(a, b): return franken_muffin")
    'def eat_dog(a, b): return franken_muffin'

    >>> _line_from_camel('dogEatDog = dogEatMud', set(['dogEatDog']))
    'dogEatDog = dog_eat_mud'
    """
    if ignore_set is None:
        ignore_set = set()

    parts = []
    match = _camel_pattern.search(line)
    while match:
        start, end = match.span()
        first_part = line[:start]
        parts.append(first_part)
        line = line[end:]
        name_to_change = match.group(0)

        if first_part and re.match(r'[A-Z]', first_part[-1]) \
                or name_to_change in ignore_set:
            # Class name or ignored name
            parts.append(name_to_change)
        else:
            parts.append(_word_from_camel(name_to_change))

        match = _camel_pattern.search(line)

    if not parts:
        return line

    parts.append(line)
    return ''.join(parts)

def _word_from_camel(word):
    """
    >>> _word_from_camel('dogEatDog')
    'dog_eat_dog'
    >>> _word_from_camel('eatAPI')
    'eat_a_p_i'
    """
    pieces = []
    for char in word:
        if char.isupper():
            pieces.append('_' + char.lower())
        else:
            pieces.append(char)
    return ''.join(pieces)

_simple_import_pattern = re.compile(
        r'^\s*import (?P<module_path>[a-zA-Z_.]+)\s*$', re.UNICODE
    )
_from_import_pattern = re.compile(
        r'^\s*from (?P<from_module>[a-zA-Z_.]+) import (?P<imported_module>[a-zA-Z_.]+)( as .*)?\s+',
        re.UNICODE
    )

def _detect_imports(lines):
    imports = set()
    for line in lines:
        if 'import' not in line:
            continue

        match = _simple_import_pattern.match(line)
        if match:
            imports.update(match.group('module_path').split('.'))
            continue

        match = _from_import_pattern.match(line)
        if match:
            imports.update(match.group('from_module').split('.'))
            imports.update(match.group('imported_module').split('.'))
            continue

    return imports

#----------------------------------------------------------------------------#

def _create_option_parser():
    usage = \
"""%prog [options] [filename1.py [filename2.py [...]]]

Converts all variables and methods in the given filenames from camel-case
notation to underscore notation. Backs up the old file using a .bak suffix."""

    parser = optparse.OptionParser(usage)

    return parser

def main(argv):
    parser = _create_option_parser()
    (options, args) = parser.parse_args(argv)

    if not args:
        parser.print_help()
        sys.exit(1)

    for filename in args:
        from_camel(filename)

#----------------------------------------------------------------------------#

if __name__ == '__main__':
    main(sys.argv[1:])

# vim: ts=4 sw=4 sts=4 et tw=78:
