#!/usr/bin/python
"""
DuctGenome

Analyze genome(s) and map them to KEGG
"""
from ductape import __version__
from ductape.actions import touchProject, prepareDir
from ductape.common.colorlog import ColorFormatter
from ductape.storage.SQLite.database import Organism, Project, Genome, Kegg
from ductape.terminal import RunThread
import argparse
import logging.handlers
import os
import sys

__author__ = "Marco Galardini"
__prog__ = "dgenome"

################################################################################
# Log setup

logger = logging.getLogger('ductape')

################################################################################
# Methods

def dinit(options, wdir, project):
    from ductape.actions import dInit
    if not dInit(project, wdir, options.name, options.descr):
        logger.warning('You can remove or rename the old project file')
        return False
    else:
        return True

def dadd(options, wdir, project):
    from ductape.actions import dGenomeAdd, dGenomeMutAdd
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    
    org = Organism(project)
    if org.isOrg(options.orgID):
        if not org.isMutant(options.orgID):
            return dGenomeAdd(project, options.orgID, options.file)
        else:
            return dGenomeMutAdd(project, options.orgID, options.file)
    else:
        logger.warning('Organism %s is not present yet!'%options.orgID)
        return False

def daddDir(options, wdir, project):
    from ductape.actions import dGenomeDirAdd
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dGenomeDirAdd(project, options.folder, options.e)

def dstart(options, wdir, project):
    from Bio import SeqIO
    from ductape.actions import dGetGenomeSteps
    
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    
    if options.cpu <= 0:
        logger.warning('How can i use %d cpus?'%options.cpu)
        return False
    
    tmp = os.path.join(wdir,'tmp')
    if not prepareDir(wdir, 'tmp'):
        return False
    
    proj = Project(project)
    org = Organism(project)
    gen = Genome(project)
    
    if len(org) == 0:
        logger.warning('No organisms are present yet!')
        logger.warning('Use %s add or %s add-dir!'%(__prog__, __prog__))
        return False
    
    steps = dGetGenomeSteps(project)
    if 'pangenome' in steps or 'map2ko' in steps:
        # Prepare the genomic files
        protdir = os.path.join(tmp,'proteins')
        if not os.path.exists(protdir):
            try:
                os.mkdir(protdir)
            except:
                logger.error('Could not create tmp directory %s'%protdir)
                return False
        infiles = {}
        for organism in org.getAll():
            protfile = os.path.join(protdir, organism.org_id)
            infiles[organism.org_id] = protfile
            SeqIO.write(gen.getRecords(organism.org_id),
                        open(protfile, 'w'), 'fasta')
        #
    for step in steps:
        if step == 'pangenome':
            if options.s:
                logger.warning('Skipping pangenome calculation')
                continue
            if not doPanGenome(project,infiles,options.cpu,options.prefix,options.matrix,options.evalue):
                logger.error('PanGenome could not be calculated!')
                return False
        elif step == 'map2ko':
            if options.g:
                logger.warning('Skipping Kegg mapping')
                continue
            if not doMap2KO(project, infiles, local=options.l, keggdb=options.k):
                logger.error('Genome(s) could not be mapped to ko!')
                return False
            if options.l:
                proj.setGenome('map2ko')
                org.setAllGenomeStatus('map2ko')
            else:
                break
        elif step == 'map2kegg':
            if options.g:
                logger.warning('Skipping Kegg mapping')
                continue
            if not doMap2KEGG(project, options.y):
                logger.error('Genome(s) could not be mapped to kegg!')
                return False
            proj.setGenome('map2kegg')
            org.setAllGenomeStatus('map2kegg')
        else:
            logger.warning('Unrecognized analysis %s'%step)
    
    return True

def dannotate(options, wdir, project):
    from ductape.actions import dGenomeAnnotate
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    
    ret = dGenomeAnnotate(project, options.s)
    if ret:
        logger.info('Information on merged annotations and multiply annotated'+
                    ' orthologs can be obtained with %s export'%__prog__)
        
    return True

def ddeannotate(options, wdir, project):
    from ductape.actions import dGenomeDeAnnotate
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    
    return dGenomeDeAnnotate(project)
         
def dstats(options, wdir, project):
    from ductape.actions import dGenomeStats
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dGenomeStats(project, svg=options.svg)

def dexport(options, wdir, project):
    from ductape.actions import dGenomeExport
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dGenomeExport(project)

def daddKo(options, wdir, project):
    from ductape.genome.map2KO import OnlineSearch
    
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    
    kegg = Kegg(project)
    gen = Genome(project)
    proj = Project(project)
    org = Organism(project)
    
    kaas = OnlineSearch()
    for filename in options.kofile:
        logger.info('Parsing KAAS output %s'%filename)
        kaas.parseKAAS(filename)
        # TODO: here check if results are empty
        kegg.addDraftKOs( set(kaas.results.values()) )
        prot_ko = [[x,y] for x,y in kaas.results.iteritems()]
        gen.addKOs( prot_ko )
    
    logger.info('Mapped %d proteins to KO'%len(kaas.results))
    proj.setGenome('map2ko')
    org.setAllGenomeStatus('map2ko')
    
    return True
    
def daddPanGenome(options, wdir, project):
    from ductape.actions import dPanGenomeAdd
    if not touchProject(project):
        logger.warning('You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dPanGenomeAdd(project, options.orthfile)

def dremove(options, wdir, project):
    from ductape.actions import dGenomeRemove
    if not touchProject(project):
        logger.warning('Nothing to be removed!\n'+
                       'You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dGenomeRemove(project, options.organisms)

def dclear(options, wdir, project):
    from ductape.actions import dGenomeClear
    if not touchProject(project):
        logger.warning('Nothing to be cleaned up!\n'+
                       'You can setup a new project by running %s init'%
                       __prog__)
        return False
    return dGenomeClear(project)

def doPanGenome(project, infiles, cpu=1, prefix='',
                matrix='BLOSUM80', evalue=1e-10):
    from ductape.genome.pangenome import PanGenomer
    
    pang = PanGenomer(infiles.values(), ncpus=cpu, prefix=prefix,
                       matrix=matrix, evalue=evalue)
    
    if not RunThread(pang):
        return False
    
    gen = Genome(project)
    gen.addPanGenome(pang.orthologs)
    
    logger.info('PanGenome size: %d groups'%len(gen.getPanGenome()))
    logger.info('Core size: %d groups'%gen.getLenCore())
    logger.info('Accessory size: %d groups'%gen.getLenAcc())
    logger.info('Unique size: %d groups'%gen.getLenUni())
    
    return True

def doMap2KO(project, infiles, local=False, keggdb='', cpu=1):
    from ductape.genome.map2KO import LocalSearch, OnlineSearch
    
    org = Organism(project)
    kegg = Kegg(project)
    gen = Genome(project)
    
    if local:
        for org_id, infile in infiles.iteritems():
            komap = LocalSearch(infile, keggdb, ncpus=cpu)
            if not RunThread(komap):
                return False
            org.setGenomeStatus(org_id, 'map2ko')
            kegg.addDraftKOs( set(komap.results.values()) )
            gen.addKOs( komap.results.iteritems() )
            logger.info('%s - mapped %d proteins to KO'%
                        (org_id, len(komap.results)))
    else:
        kaas = OnlineSearch()
        sys.stdout.write(kaas.getExplanation() + '\n')
        sys.stdout.write('When the analysis are finished launch %s add-ko\n'%
                         __prog__)
        
    return True

def doMap2KEGG(project, keeptrying=False):
    from ductape.actionsterm import fetchKegg
    from ductape.kegg.kegg import KoMapper
    
    # Fetch the Kegg DB?
    if not fetchKegg(project, keeptrying):
        logger.error('Could not fetch data from KEGG')
        return False
    
    kegg = Kegg(project)
    kos = [ko.ko_id for ko in kegg.getKO2Analyze()]
    if len(kos) == 0:
        logger.warning('No KO entries to be analyzed!')
        logger.warning('The KO entries could also have been already analyzed')
        return True
    avoid = [kid for kid in kegg.getAllIDs()]
    
    komap = KoMapper(kos,avoid=avoid, keeptrying=keeptrying)
    
    if not RunThread(komap):
        return False
    
    # Details
    kegg.addKOs(komap.result.ko)
    logger.info('Added %d KO IDs'%len(komap.result.ko))
    kegg.addReactions(komap.result.react)
    logger.info('Added %d Re IDs'%len(komap.result.react))
    kegg.addCompounds(komap.result.comp)
    logger.info('Added %d Co IDs'%len(komap.result.comp))
    kegg.addPathways(komap.result.path)
    logger.info('Added %d Path IDs'%len(komap.result.path))
    kegg.addRPairs(komap.result.rpair)
    logger.info('Added %d RPair IDs'%len(komap.result.rpair))
    # Links
    kegg.addKOReacts(komap.result.koreact)
    kegg.addPathComps(komap.result.pathcomp)
    kegg.addPathReacts(komap.result.pathreact)
    kegg.addReactComps(komap.result.reactcomp)
    kegg.addCompReacts(komap.result.compreact)
    kegg.addReactRPairs(komap.result.reactrpair)
    kegg.addRPairReacts(komap.result.rpairreact)
    logger.info('Added Kegg links')
    # HTML maps
    kegg.addPathHtml(komap.result.pathmaps)
    logger.info('Added Kegg maps')
    
    return True

################################################################################
# Options

def getOptions():
    description = "Add and analyze genomes"
    parser = argparse.ArgumentParser(description = description,
                                     prog=__prog__)
    parser.add_argument('-p', metavar='project', action='store',
                        dest='project',
                        default='ductape.db',
                        help='Project file')
    parser.add_argument('-w', metavar='workdir', action='store', dest='wdir',
                        default='.',
                        help='Working directory')
    parser.add_argument('-v', action='count',
                        default=0,
                        help='Increase verbosity level')
    parser.add_argument('--version', action='version',
                        version='%(prog)s '+__version__)
    subparsers = parser.add_subparsers()

    parser_init = subparsers.add_parser('init', help='Initialize the project')
    parser_init.add_argument('-n', action="store",
                             dest='name',
                             default = 'Project',
                             help='Project name')
    parser_init.add_argument('-d', metavar='descr', action="store",
                            dest='descr',
                            default = 'DuctApe project',
                            help='Project description')
    parser_init.set_defaults(func=dinit)

    parser_add = subparsers.add_parser('add',
                           help='Add a single genome')
    parser_add.add_argument('file', action="store",
                            help='Protein fasta file')
    parser_add.add_argument('orgID', action='store',
                            help='Organism ID')
    parser_add.set_defaults(func=dadd)
    
    parser_add_dir = subparsers.add_parser('add-dir',
           help='Add a series of genomes (orgIDs will be guessed)')
    parser_add_dir.add_argument('folder', action="store",
                            help='Folder where the genomes fasta are stored')
    parser_add_dir.add_argument('-e', metavar='extension', action="store",
                            default = 'faa',
                            help='Fasta files extension')
    parser_add_dir.set_defaults(func=daddDir)
    
    parser_add_ko = subparsers.add_parser('add-ko',
                           help='Add a KO map (protein code --> KO code)')
    parser_add_ko.add_argument('kofile', action='store', nargs='+',
                            help='KO map')
    parser_add_ko.set_defaults(func=daddKo)
    
    parser_add_ko = subparsers.add_parser('add-orth',
                       help='Add a pangenome (ortholog code --> protein code)')
    parser_add_ko.add_argument('orthfile', action='store',
                            help='Orthologs file')
    parser_add_ko.set_defaults(func=daddPanGenome)

    parser_start = subparsers.add_parser('start', help='Start the analysis')
    parser_start.add_argument('-n', metavar='cpu', action="store", dest='cpu',
                            type=int,
                            default=1,
                            help='Number of CPUs to be used')
    parser_start.add_argument('-s', action="store_true",
                            default=False,
                            help='Skip pangenome creation')
    parser_start.add_argument('-g', action="store_true",
                            default=False,
                            help='Skip Kegg mapping')
    parser_start.add_argument('-y', action="store_true",
                            default=False,
                            help='Try to fetch Kegg data even while encountering failures')
    parser_start.add_argument('-p', action="store", dest='prefix',
                            default='',
                            help='Orthologous groups prefix')
    parser_start.add_argument('-m', action="store", dest='matrix',
                            default='BLOSUM80',
                            help='BLAST matrix for pangenome [Default: BLOSUM80]')
    parser_start.add_argument('-e', action="store",
                            dest='evalue',
                            type=float,
                            default=1e-10,
                            help='BLAST E-value threshold for pangenome [Default: 1e-10]')
    parser_start.add_argument('-l', action="store_true",
                            default=False,
                            help='Local map2ko')
    parser_start.add_argument('-k', action="store",
                            help='Kegg database location (for local map2ko)')
    parser_start.set_defaults(func=dstart)
    
    parser_annotate = subparsers.add_parser('annotate', help='Transfer and correct the KEGG annotation')
    parser_annotate.add_argument('-s', action="store_true",
                            default=False,
                            help='Skip annotation merging (just check)')
    parser_annotate.set_defaults(func=dannotate)
    
    parser_deannotate = subparsers.add_parser('deannotate', help='Revert transfered annotations')
    parser_deannotate.set_defaults(func=ddeannotate)
    
    parser_stats = subparsers.add_parser('stats', help='Print genomic statistics')
    parser_stats.add_argument('-s', '--svg', action="store_true",
                            default=False,
                            help='Figures in svg format instead of png')
    parser_stats.set_defaults(func=dstats)
    
    parser_export = subparsers.add_parser('export', help='Export genomic data')
    parser_export.set_defaults(func=dexport)

    parser_rm = subparsers.add_parser('rm', help='Remove genome analysis')
    parser_rm.add_argument('organisms', metavar='orgID', nargs='+',
                              action="store",
                            help='Organism(s) to be removed')
    parser_rm.set_defaults(func=dremove)
    
    parser_clear = subparsers.add_parser('clear',
                                         help='Clear all the genomic results')
    parser_clear.set_defaults(func=dclear)
    
    return parser.parse_args()

################################################################################

options = getOptions()

logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler()
if options.v == 0:
    ch.setLevel(logging.INFO)
elif options.v >= 1:
    ch.setLevel(logging.DEBUG)
formatter = ColorFormatter('%(asctime)s - $COLOR%(message)s$RESET','%H:%M:%S')
ch.setFormatter(formatter)
logger.addHandler(ch)

fh = logging.handlers.RotatingFileHandler('ductape.log', maxBytes=2000000)
formatter = logging.Formatter('%(asctime)s - %(name)s - [%(levelname)s] - %(message)s',
                            '%Y-%m-%d %H:%M:%S')
fh.setFormatter(formatter)
logger.addHandler(fh)

wdir = os.path.abspath(options.wdir)
if not os.path.exists(wdir):
    try:
        os.mkdir(wdir)
    except:
        logger.error('Could not create working directory %s'%wdir)
    
project = os.path.join(wdir, options.project)

ret = options.func(options, wdir, project)

touchProject(project)

if not ret:
    sys.exit(1)
