import os

from django.db.models import Sum

from .models import FreqDict,Lemma,WordForm,Orthography,Transcription


def checkDict(freqDict):
    if FreqDict.objects.filter(Name=freqDict).exists():
        return True
    return False

def lookupFreq(word):
    total_freq = sum([x.Frequency for x in Lemma.objects.filter(wordform__Orthographies__Spelling = word).distinct()])
    return total_freq

#def lookupStress(word,freqDict):
#    qs = Spelling.objects.filter(Label=word).order_by('-Word__Frequency')
#    return [q.StressPattern for q in qs]

def lookupCat(word):
    qs = Lemma.objects.filter(wordform__Orthographies__Spelling = word).distinct().order_by('-Frequency')
    #qs = Orthography.objects.filter(Label=word).order_by('-Word__Frequency')
    if len(qs) == 0:
        return 'NA'
    cat = qs[0].Category.Label
    if cat == 'ADV':
        cat = 'R'
    return cat.lower()

def filterNGrams(ngram_path):
    spells = set([str(x) for x in Orthography.objects.all()])
    orig_path = os.path.join(ngram_path,'original')
    trim_path = os.path.join(ngram_path,'trimmed')
    files = os.listdir(orig_path)
    for f in files:
        with open(os.path.join(orig_path,f),'r') as infile:
            with open(os.path.join(trim_path,f),'w') as outfile:
                for line in infile:
                    l = line.strip().split("\t")
                    l[0] = l[0].split(" ")
                    bad_word_check = False
                    for i in l[0]:
                        if i not in spells:
                            bad_word_check = True
                            break
                    if bad_word_check:
                        continue
                    outfile.write('\t'.join([' '.join(l[0]),l[1]]))
                    outfile.write('\n')



