import os
import re

from celery import task,chord,group,chain
from celery.signals import task_success
from celery.utils.log import get_task_logger

from django.conf import settings

from .models import *

@task()
def reset_trigrams():
    files = sorted(os.listdir(settings.TRIGRAM_PATH))
    c2 = (delete_all.si() | load_orths.si(files) | load_trigrams.si(files))
    res = c2()
    res.get()


@task()
def delete_all():
    Orthography.objects.all().delete()
    Trigram.objects.all().delete()

@task()
def load_orths(files):
    orths = set([])
    for f in files:
        print f
        orths.update(load_ortho_file(f))
    orths = [Orthography(spelling = x) for x in orths]
    Orthography.objects.bulk_create(orths)


@task()
def load_ortho_file(f):
    print f
    orths = set([])
    with open(os.path.join(settings.TRIGRAM_PATH,f),'r') as file_handle:
        for line in file_handle:
            l = line.strip().split("\t")
            orths.update(l[0].split(" "))
    return orths


@task()
def load_trigrams(files):
    for f in files:
        print f
        load_trigram_file(f)


def load_trigram_file(f):
    trgs = []
    with open(os.path.join(settings.TRIGRAM_PATH,f),'r') as file_handle:
        for line in file_handle:
            l = line.strip().split("\t")
            words = []
            for w in l[0].split(" "):
                words.append(Orthography.objects.get(spelling=w))
            trgs.append(Trigram(word=words[-1],word_i_minus_one=words[1],word_i_minus_two=words[0],count=int(l[1])))
            if len(trgs) > 100000:
                Trigram.objects.bulk_create(trgs)
                trgs = []
    if trgs != []:
        Trigram.objects.bulk_create(trgs)
