from docstash import Stash

from docpipe.constants import EXTRACT_PHASE, TRANSFORM_PHASE
from docpipe.graph import Node, resolve_dependencies


class Pipeline(object):

    def __init__(self, name, config=None):
        self.config = dict()
        self.name = name
        if config is not None:
            self.config.update(config)

        self._nodes = None
        self._collection = None

    @property
    def collection(self):
        if self._collection is None:
            config = self.config.get('config', {})
            stash = Stash(path=config.get('stash'))
            name = config.get('collection', 'default')
            self._collection = stash.get(name)
        return self._collection

    @property
    def nodes(self):
        if self._nodes is None:
            self._nodes = []

            for phase in [EXTRACT_PHASE, TRANSFORM_PHASE]:
                for name, config in self.config.get(phase, {}).items():
                    base = self.config.get('config', {}).copy()
                    base.update(config)
                    node = Node(self, phase, name, base)
                    self._nodes.append(node)

        return self._nodes

    def extract(self):
        for node in self.nodes:
            node.extract()

    def transform(self):
        # TODO: parallelize
        for document in self.collection:
            for node in resolve_dependencies(self.nodes):
                node.transform(document)

    def run(self):
        self.extract()
        self.transform()

    def __repr__(self):
        return "<Pipeline('%s')>" % self.name
