#!/usr/bin/env python
# coding: utf-8
"""Chanjo CLI

Usage:
  chanjo annotate <store> using <source> [--cutoff=<int>] [--splice-sites]
         [--dialect=<db>] [--sample=<str>] [--group=<int>]
         [--json=<file> | --pipe] [--force] [--prepend=<string>]
  chanjo read <chrom> <start> <end> using <source> [--cutoff=<int>]
         [--dialect=<db>]
  chanjo peek <store> <gene>... [--sample=<str>] [--group=<int>]
         [--dialect=<db>]
  chanjo build <store> using <ccds_path> [--force] [--dialect=<db>]
  chanjo <store> import <json_path>... [--dialect=<db>]
  chanjo -h | --help
  chanjo --version

Commands:
  annotate            Annotate genes in a SQLite database
  read                Coverage for an interval in a BAM-file
  peek                Peek at coverage metrics for an *annotated* gene
  build               Builds a new skeleton SQLite database
  import              Import annotations after using "--json" option

Arguments:
  <store>             Path to new or existing SQLite database
  <source>            Path to the BAM alignment file (.bam)
  <ccds_path>         Path to CCDS database dump (.txt)
  <gene>              List of HGNC symbols
  <json_path>         List of JSON-files to import
  <chrom>             Chromosome ID
  <start>             Starting interval position (1-based)
  <end>               Ending interval position (1-based)

Options:
  -h --help           Show this screen
  --version           Show version
  -r --read=<file>    Text file with one HGNC symbol per line
  --ccds=<file>       Path to CCDS database dump (build new database)
  -c --cutoff=<int>   Cutoff for completeness [default: 10]
  -d --dialect=<db>   Type of database: 'sqlite' or 'mysql' [default: sqlite]
  -ss --splice-sites  Include splice sites (+/- 2 bases) for each exon
  -s --sample=<str>   Sample ID [default: 0-0-0U]
  -g --group=<int>    Group/Family ID [default: 0]
  -j --json=<file>    Write temp JSON-file for later import (parallel)
  -p --pipe           Write to standard output, see "--json" (parallel)
  -f --force          Force overwrite of existing files
  --prepend=<string>  Prepend a string to each chromosome ID [default: auto]
"""
from __future__ import print_function
from docopt import docopt
import itertools
import json
import sys
from path import path

import chanjo
from chanjo import bam, sql, utils


def fileCheck(*fpaths, **kwargs):
  """
  Checks a path for an existing file before overwrite.

  :param bool force: Whether to skip the actual checking (optional)
  """
  # Get optional "force" keyword argument
  force = kwargs.get("force", False)

  for fpath in fpaths:
    f = path(fpath)
    if f.exists() and not force:
      sys.exit("\033[93mWARNING: {path} already exists. Use '--force' to"
               "overwrite.\033[0m".format(path=f))

def import_(db, data):
  """
  Imports annotations stored in JSON-files for example after running Chanjo in
  parallel.
  """
  annotations = data["annotations"]
  sample_id = data["sample_id"]
  group_id = data["group_id"]

  # Add a Sample entry with meta-data
  db.add(db.create("sample",
    sample=sample_id,
    group=group_id,
    cutoff=data["cutoff"],
    source=data["source"],
    splice=data["splice"]
  ))

  # And for each of the annotations
  db.add([db.create("exon_data",

            element_id=anno[2],
            coverage=anno[0],
            completeness=anno[1],
            sample_id=sample_id,
            group_id=group_id

          ) for anno in annotations]).commit()

  # ======================================================
  #   Now we can extend the annotations from exons to
  #   transcripts and genes. Not ready yet.
  #   N.B. We commit so the next query works!
  # ------------------------------------------------------
  db.add([db.create("transcript_data",
            element_id=tx[0],
            sample_id=sample_id,
            group_id=group_id,
            coverage=tx[1],
            completeness=tx[2]
          ) for tx in db.transcriptStats(sample_id)]).commit()

  # Extend annotations to genes
  db.add([db.create("gene_data",
            element_id=gs[0],
            sample_id=sample_id,
            group_id=group_id,
            coverage=gs[1],
            completeness=gs[2]
          ) for gs in db.geneStats(sample_id)]).commit()

def main(args):
  # Prepare input arguments
  cutoff = int(args["--cutoff"])
  group_id = int(args["--group"])
  sample_id = args["--sample"]
  store = args["<store>"]
  source = args["<source>"]
  force = args["--force"]
  ccds_path = args["<ccds_path>"]
  dialect = args["--dialect"]

  # This is the default.
  # In the future, Chanjo will guess baesd on BAM headers
  if args['--prepend'] == 'auto':
    chrom_prepend = ''
  else:
    # Use whatever the user defined
    chrom_prepend = args['--prepend']

  # Set up adapters
  db = sql.ElementAdapter(store or ":memory:", dialect=dialect)

  if source:
    cov = bam.CoverageAdapter(source)

  # First command will determine which path to take
  if args["read"]:
    # Reads coverage metrics from a BAM-file across a given interval and prints
    # the results as JSON string.
    chrom = args["<chrom>"]
    start = int(args["<start>"])
    end = int(args["<end>"])

    # Read the BAM-file, calculate coverage, and print to console.
    depths = cov.read(chrom, start, end)
    coverage, completeness = utils.coverageMetrics(depths, cutoff)

    output = {
      "interval": {
        "chrom": chrom,
        "start": start,
        "end": end
      },
      "coverage": coverage,
      "completeness": completeness
    }

    print(json.dumps(output, indent=4))

  elif args["peek"]:
    # Prints out a JSON string of the data you have requested based on genes.
    output = {}

    # Loop over each of the HGNC symbols
    for gene_id in args["<gene>"]:

      output[gene_id] = []

      # Loop over each of the annotations
      for data in db.get("gene", gene_id).data:

        # Print the annotation according to what the user requests (all or one)
        if sample_id == "0-0-0U" or sample_id == data.sample_id:
          output[gene_id].append({
            "sample": data.sample_id,
            "coverage": data.coverage,
            "completeness": data.completeness
          })

    print(json.dumps(output, indent=4))

  elif args["build"]:
    # Builds a new database instance with barebones structure and
    # relationships, no annotations. This is useful when you plan to run
    # Chanjo in parallel and need a reference database.

    # Check path
    fileCheck(store, force=force)

    # Build up a barebones database structure
    db.quickBuild(ccds_path)

  elif args["import"]:
    # Import one text-file at a time
    for txt_path in args["<json_path>"]:

      # Open the file
      with open(txt_path, "r") as handle:

        data = json.load(handle)
        import_(db, data)

  else:
    # ... which means we're going to annotate some coverage
    if args["--json"]:
      # Check path
      fileCheck(args["--json"], force=force)

    # Splice site check
    extend = 0
    if args["--splice-sites"]:
      extend = 2

    exon = db.get("class", "exon")
    data = [None] * 24
    for i, chrom in enumerate(utils.chromosomes(chrom_prepend)):
      exons = db.session.query(exon.start, exon.end, exon.id)\
                .filter_by(chrom=chrom).order_by(exon.start).all()

      data[i] = utils.process(cov, chrom, exons, extend, cutoff, 17000)

    # Flatten data list
    flat_data = itertools.chain.from_iterable(data)

    output = {
      "sample_id": sample_id,
      "group_id": group_id,
      "cutoff": cutoff,
      "source": path(source).abspath(),
      "splice": args["--splice-sites"],
      "annotations": list(flat_data)
    }

    if args["--json"]:
      # We should save a temp version of the exon annotations to a text-file
      # rather than persisting to the datastore. We use JSON.
      with open(args["--json"], "w") as handle:
        json.dump(output, handle)

    elif not args["--pipe"]:
      # Go straight to importing data to database
      import_(db, output)

    else:
      # Print to standard out to enable piping (if --pipe)
      print(json.dumps(output, indent=4))

if __name__ == "__main__":
  # Parse arguments based on docstring above
  args = docopt(__doc__, version="Chanjo {v}".format(v=chanjo.__version__))
  
  main(args)
