#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Chanjo - Clinical sequencing coverage analysis tool

Usage:
  chanjo init
  chanjo convert [<reference>] [--adapter=STR] [--out=FILE] [--force]
  chanjo build [<bed_file>] [--db=STR] [--dialect=STR] [--force]
  chanjo export [--db=STR] [--dialect=STR] [--out=FILE] [--force]
  chanjo annotate <bam_file> [<bed_file>] [--cutoff=INT] [--extend-by=INT]
    [--sample=STR] [--group=STR] [--institute=STR] [--prepend=STR]
    [--threshold=INT] [--config=FILE] [--out=FILE] [--force]
  chanjo import [<bed_file>] [--db=STR] [--dialect=STR] [--json]
  chanjo --help
  chanjo --version

Commands:
  annotate            Annotate intervals in a BED file/stream
  build               Constructs a new skeleton SQL interval store
  convert             Convert reference database to Chanjo BED interval file
  import              Import coverage annotations to an existing database
  init                Walks you through setting up a config file

Arguments:
  <bam_file>          Path to BAM file
  <bed_file>          BED file with interval definitions, Chanjo-style
  <reference>         Intervals reference file (e.g. CCDS database)

Options:
  -h --help           Show this screen
  -v --version        Show version
  -a --adapter=STR    Adapter for converting reference to BED
                      [default: {--adapter}]
  -c --cutoff=INT     Cutoff for completeness calculation [default: {--cutoff}]
  --config=FILE       Path to YAML/JSON config file
  -d --dialect=STR    Type of database: sqlite/mysql [default: {--dialect}]
  -e --extend-by=INT  Dynamically extend intervals +/- [default: {--extend-by}]
  -f --force          Overwrite existing files/databases without warning
  -g --group=STR      Group Id to associate samples e.g. in trios
  -i --institute=STR  Institute Id associate groups of samples
  -j --json           Import legacy JSON annotate output file
  -p --prepend=STR    Prepend to each contig Id [default: {--prepend}]
  -s --sample=STR     Sample Id (otherwise set automatically)
  -t --threshold=INT  Base pair threshold for optimizing BAM file reading
                      [default: {--threshold}]
  -o --out=FILE       Define an output file other than "stdout"
  --db=URI            Path/URI of a SQL database [default: {--db}]
"""
from importlib import import_module

from clint.textui import puts
from docopt import docopt

import chanjo
from chanjo import utils, pipelines, config


def main(args):
  # +------------------------------------------------------------------+
  # | Pre-process some input arguments & options
  # +------------------------------------------------------------------+
  sql_uri = args['--db']
  bed_stream = utils.open_or_stdx(args['<bed_file>'])
  bam_path = args['<bam_file>']
  sql_dialect = args['--dialect']
  # Load user defined sample Id or generate one randomly
  sample_id = args['--sample'] or utils.id_generator()
  group_id = args['--group']
  institute_id = args['--institute']
  overwrite = args['--force']
  cutoff = int(args['--cutoff'])
  extension = int(args['--extend-by'])
  bp_threshold = int(args['--threshold'])
  contig_prepend = args['--prepend']
  out_stream = utils.open_or_stdx(args['--out'], open_args=('w',))

  if args['init']:
    # Print a welcome message
    puts(chanjo.__doc__)

    try:
      # Launch the init pipeline with questionnair
      config.init_pipeline(__file__)
    except KeyboardInterrupt:
      # Silently exit
      puts()
    finally:
      puts()

  elif args['convert']:
    # Dynamically import converter
    converter_id = args['--adapter']
    converter = import_module('chanjo_' + converter_id)

    # Run converter pipeline
    ccds_stream = utils.open_or_stdx(args['<reference>'])
    converter.pipeline(ccds_stream, out_stream)

  elif args['build']:
    # Build a new skeleton SQL interval store
    _ = []
    pipelines.build(bed_stream, sql_uri, sql_dialect, overwrite, _)

  elif args['export']:
    # Export BED intervals from database
    pipelines.export(sql_uri, sql_dialect, out_stream)

  elif args['annotate']:
    # Annotate a list of intervals with coverage and completeness
    extension = int(args['--extend-by'])

    pipelines.annotate(bed_stream, sample_id, group_id, institute_id,
      cutoff, bam_path, extension, contig_prepend, bp_threshold, out_stream)

  elif args['import']:
    if args['--json']:
      # The ``bed_stream`` really is a JSON file
      pipelines.import_json(bed_stream, sql_uri, sql_dialect)
    else:
      pipelines.import_data(bed_stream, sql_uri, sql_dialect)

if __name__ == '__main__':
  # Set up default that would normally go into the docstring
  defaults = {
    '--adapter': 'ccds',
    '--cutoff': 10,
    '--dialect': 'sqlite',
    '--extend-by': 0,
    '--prepend': '',
    '--threshold': 17000,
    '--db': 'coverage.sqlite'
  }

  # Read command line arguments preliminary to get possible config path
  args = docopt(__doc__.format(**defaults),
                version='Chanjo v' + chanjo.__version__)

  # Get possible config values
  config_values = utils.read_config(__file__, args['--config'])

  # Merge/overwrite defaults with config values
  defaults.update(config_values)

  # Parse docstring defined arguments and check that everything looks OK
  args = docopt(__doc__.format(**defaults),
                version='Chanjo v' + chanjo.__version__)

  main(args)
