#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2014 Ivan Melnikov <iv at altlinux dot org>
#
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


"""Check documentation for simple style requirements.

What is checked:
    - lines should not be longer than 79 characters
      - exception: line with no whitespace except maybe in the beginning
      - exception: line that starts with '..' -- longer directives are allowed,
        including footnotes
    - no tabulation for indentation
    - no trailing whitespace
"""

import argparse
import collections
import fnmatch
import functools
import os
import re
import sys

from six.moves import configparser


FILE_PATTERNS = ['*.rst', '*.txt']
MAX_LINE_LENGTH = 79
TRAILING_WHITESPACE_REGEX = re.compile('\s$')
STARTING_WHITESPACE_REGEX = re.compile('^(\s+)')


def check_max_length(max_line_length, contents):

    def starting_whitespace(line):
        m = re.match(r"^(\s+)(.*)$", line)
        if not m:
            return 0
        return len(m.group(1))

    def all_whitespace(line):
        return bool(re.match(r"^(\s*)$", line))

    def contains_url(line):
        if "http://" in line or "https://" in line:
            return True
        return False

    def find_directive_end(start, lines):
        after_lines = collections.deque(lines[start + 1:])
        k = 0
        while after_lines:
            line = after_lines.popleft()
            if all_whitespace(line) or starting_whitespace(line) >= 1:
                k += 1
            else:
                break
        return start + k

    # Find where directives start & end so that we can exclude content in
    # these directive regions.
    lines = contents.split("\n")
    directives = []
    for i, line in enumerate(lines):
        if re.match(r"^..\s(.*?)::\s*", line):
            directives.append((i, find_directive_end(i, lines)))

    for i, line in enumerate(lines):
        in_directive = False
        for (start, end) in directives:
            if i >= start and i <= end:
                in_directive = True
                break
        if in_directive:
            continue
        if len(line) > max_line_length:
            stripped = line.strip()
            # line can't be split
            if ' ' not in stripped:
                continue
            if contains_url(stripped):
                continue
            yield (i + 1, 'D001', 'Line too long')


def check_trailing_whitespace(line):
    if TRAILING_WHITESPACE_REGEX.search(line):
        yield ('D002', 'Trailing whitespace')


def check_indentation_no_tab(line):
    match = STARTING_WHITESPACE_REGEX.search(line)
    if match:
        spaces = match.group(1)
        if '\t' in spaces:
            yield ('D003', 'Tabulation used for indentation')


def check_lines(lines, line_checks):
    for idx, line in enumerate(lines, 1):
        line = line.rstrip('\n')
        for check in line_checks:
            for code, message in check(line):
                yield idx, code, message


def check_files(filenames, line_checks, content_checks):
    for fn in filenames:
        with open(fn) as f:
            content = f.read()
            for content_check in content_checks:
                for line_num, code, message in content_check(content):
                    yield fn, line_num, code, message
            f.seek(0)
            for line_num, code, message in check_lines(f, line_checks):
                yield fn, line_num, code, message


def find_files(pathes, patterns):
    for path in pathes:
        if os.path.isfile(path):
            yield path
        elif os.path.isdir(path):
            for root, dirnames, filenames in os.walk(path):
                for filename in filenames:
                    if any(fnmatch.fnmatch(filename, pattern)
                           for pattern in patterns):
                        yield os.path.join(root, filename)
        else:
            print('Invalid path: %s' % path)


def extract_config(args):
    filenames = [
        os.path.join(os.getcwd(), "doc8.ini"),
        os.path.join(os.getcwd(), "tox.ini"),
        os.path.join(os.getcwd(), "pep8.ini"),
    ]
    parser = configparser.RawConfigParser()
    parser.read(filenames)
    cfg = {}
    try:
        cfg['max_line_length'] = parser.getint("doc8", "max-line-length")
    except (configparser.NoSectionError, configparser.NoOptionError):
        cfg['max_line_length'] = MAX_LINE_LENGTH
    try:
        ignores = parser.get("doc8", "ignore")
    except (configparser.NoSectionError, configparser.NoOptionError):
        cfg['ignore'] = set()
    else:
        ignoreables = set()
        for i in ignores.split(","):
            i = i.strip()
            if not i:
                continue
            ignoreables.add(i)
        cfg['ignore'] = ignoreables
    return cfg


def unique_itr(itr):
    seen = set()
    for i in itr:
        if i in seen:
            continue
        yield i
        seen.add(i)


def main():
    file_types = ", ".join(FILE_PATTERNS)
    parser = argparse.ArgumentParser()
    parser.add_argument("paths", metavar='path', type=str, nargs='*',
                        help=("path to scan for %s files"
                              " (default: os.getcwd())") % file_types,
                        default=[os.getcwd()])
    args = parser.parse_args()
    dirs = list(unique_itr(args.paths))
    cfg = extract_config(args)
    line_checks = [
        check_trailing_whitespace,
        check_indentation_no_tab,
    ]
    content_checks = [
        functools.partial(check_max_length, cfg['max_line_length']),
    ]
    ok = True
    for error in check_files(find_files(dirs, FILE_PATTERNS),
                             line_checks, content_checks):
        if error[2] in cfg['ignore']:
            continue
        ok = False
        print('%s:%s: %s %s' % error)
    sys.exit(0 if ok else 1)

if __name__ == '__main__':
    main()
