from django.db import models
from .managers import BulkManager,chunks
import re

# Create your models here.
from .helper import fetch_celex_resource


class WordManager(BulkManager):
    tbl_name = 'celex_word'
    cols = ['FreqDict_id','Label','Category_id','Frequency']


class TranscriptionManager(BulkManager):
    tbl_name = 'celex_transcription'
    cols = ['Transcription','Word_id']

class SpellingManager(BulkManager):
    tbl_name = 'celex_spelling'
    cols = ['Label','Word_id']



class FreqDict(models.Model):
    Name = models.CharField(max_length=100)
    Description = models.CharField(max_length=250,blank=True,null=True)

    def __unicode__(self):
        return u'%s' % self.Name

    def getDir(self):
        return fetch_celex_resource('%s/' % self.Name)

    def loadInfo(self):
        #Word.objects.filter(FreqDict=self).delete()
        #Dummy Segment loading
        #st = SegmentType.objects.get_or_create(Label="A")[0]
        #Category loading
        #SyntacticCategory.objects.all().delete()
        self.loadCats()
        lemmas = self.loadLemmas()
        wfs = self.loadWF()
        ls = []
        for key in lemmas:
            if int(lemmas[key]['CategoryNum']) > 12:
                continue
            cat = SyntacticCategory.objects.get(pk=int(lemmas[key]['CategoryNum']))
            ls.append(Lemma.objects.create(pk=int(key),FreqDict=self,Label=lemmas[key]['Word'],Category=cat,Frequency=int(lemmas[key]['Freq'])))
        Lemma.objects.bulk_create(ls)
        for key in wfs:
            wf = WordForm.objects.create(pk=int(key),Frequency = int(wfs[key]['Freq']))
            ss = (Orthography.objects.get_or_create(Spelling=s)[0] for s in wfs[key]['Spellings'])
            wf.Orthographies.add(*ss)
            ts = (Transcription.objects.get_or_create(Transcription=t)[0] for t in wfs[key]['Transcriptions'])
            wf.Transcriptions.add(*ts)


    def loadCats(self):
        cats = []
        with open(fetch_celex_resource('Category/cats.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("\t")
                if head is None:
                    head = l
                    continue
                cats.append(SyntacticCategory(pk=int(l[0]),Label=l[1],Description=l[2],CategoryType=l[3]))
        SyntacticCategory.objects.bulk_create(cats)

    def loadLemmas(self):
        lemmas = self.loadOrthLemmas()
        #lemmas = self.loadTransLemmas(lemmas)
        lemmas = self.addCategories(lemmas)
        return lemmas

    def loadWF(self):
        wfs = self.loadOrthWF()
        wfs = self.loadTransWF(wfs)
        return wfs

    def loadOrthLemmas(self):
        lemmas = {}
        with open(fetch_celex_resource('Orthography/celex-orthography-lemmas.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("\\")
                if head is None:
                    head = l
                    continue
                main = l[:8]
                additional = l[8:]
                spellings = [l[1]]
                nl = {'Word':l[1],'Freq':l[2]}
                if len(additional) != 0:
                    additional = chunks(additional,4)
                    spellings.extend([re.sub(r'-(?!-)',r'',x[3]) for x in additional])
                nl['Spellings'] = set(spellings)
                lemmas[l[0]] = nl
        return lemmas

    def loadTransLemmas(self,lemmas):
        with open(fetch_celex_resource('Phonology/celex-phonology-lemmas.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("\\")
                if head is None:
                    head = l
                    continue
                main = l[:8]
                additional = l[8:]
                trans = [main[5]]
                if len(additional) != 0:
                    additional = chunks(additional,4)
                    trans.extend([x[1] for x in additional])
                lemmas[l[0]]['Transcriptions'] = set(trans)
        return lemmas


    def addCategories(self,lemmas):
        with open(fetch_celex_resource('Phonology/celex-syntax-lemmas.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("//")
                if head is None:
                    head = l
                    continue
                lemmas[l[0]]['CategoryNum'] = l[3]
        return lemmas

    def loadOrthWF(self):
        wfs = {}
        with open(fetch_celex_resource('Orthography/celex-orthography-wordforms.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("\\")
                if head is None:
                    head = l
                    continue
                main = l[:9]
                additional = l[9:]
                spellings = [l[1]]
                nl = {'IdLemma':l[3],'Freq':l[2]}
                if len(additional) != 0:
                    additional = chunks(additional,5)
                    spellings.extend([x[1] for x in additional])
                nl['Spellings'] = set(spellings)
                wfs[main[0]] = nl
        return wfs

    def loadTransWF(self,wfs):
        with open(fetch_celex_resource('Phonology/celex-phonology-wordforms.txt'),'r') as f:
            head = None
            for line in f:
                l = line.strip().split("\\")
                if head is None:
                    head = l
                    continue
                main = l[:9]
                additional = l[9:]
                trans = [main[5]]
                if len(additional) != 0:
                    additional = chunks(additional,4)
                    trans.extend([x[1] for x in additional])
            wfs[main[0]]['Transcriptions'] = set(trans)
        return wfs



#class SegmentType(models.Model):
#    Label = models.CharField(max_length=10)
#    Syllabic = models.NullBooleanField()
#    Obstruent = models.NullBooleanField()
#    Nasal = models.NullBooleanField()
#    Vowel = models.NullBooleanField()
#
#    def guessProperties(self):
#        NasalInd = ['n','m']
#        VowelInd = set(['i','u','o','e','a'])
#        ApproxInd = ['r','l','y']
#        for s in self.Label.lower():
#            if s in VowelInd:
#                self.Vowel = True
#                break
#        else:
#            self.Vowel = False
#        self.save()
#
#    def isSyllabic(self):
#        return self.Syllabic
#
#    def isNasal(self):
#        return self.Nasal
#
#    def isObs(self):
#        return self.Obstruent
#
#    def isVowel(self):
#        return self.Vowel
#
#    def __unicode__(self):
#        return u'%s' % (self.Label,)

class SyntacticCategory(models.Model):
    Label = models.CharField(max_length=50)
    Description = models.CharField(max_length=100,blank=True,null=True)
    CategoryType = models.CharField(max_length=100,blank=True,null=True)

#class CatRelationship(models.Model):
#    Word = models.ForeignKey('Word')
#    Category = models.ForeignKey(SyntacticCategory)
#    Count = models.BigIntegerField(blank=True,null=True)

#    class Meta:
#        ordering = ['-Count']


class Transcription(models.Model):
    Transcription = models.CharField(max_length=250)
    StressPattern = models.CharField(max_length=100,blank=True,null=True)
    CVSkel = models.CharField(max_length=100,blank=True,null=True)

    #def getCVStruct(self):
    #    if self.CVSkel is not None:
    #        return self.CVSkel
    #    cvstruct = ''
    #    for s in self.Transcription.all():
    #        if s.isVowel():
    #            cvstruct = cvstruct + 'V'
    #        else:
    #            cvstruct = cvstruct + 'C'
    #    self.CVSkel = cvstruct
    #    self.save()
    #    return cvstruct

class Orthography(models.Model):
    Spelling = models.CharField(max_length=250)

class WordForm(models.Model):
    Lemma = models.ForeignKey('Lemma')
    Orthographies = models.ManyToManyField(Orthography)
    Transcriptions = models.ManyToManyField(Transcription)
    Frequency = models.FloatField(blank=True,null=True)

    def __unicode__(self):
        return u'%s' % self.Label

class Lemma(models.Model):
    FreqDict = models.ForeignKey(FreqDict)
    Label = models.CharField(max_length=250)
    Category = models.ForeignKey(SyntacticCategory)
    Frequency = models.FloatField(blank=True,null=True)

    #objects = WordManager()

    def getUR(self):
        return ".".join(str(t) for wf in WordForm.objects.filter(Word=self) for t in wf.Transcriptions.all())

    def getSpellings(self):
        return ", ".join(str(s) for wf in WordForm.objects.filter(Word=self) for s in wf.Orthographies.all())

    #def getPrimaryCategory(self):
    #    if self.PrimaryCategory is not None:
    #        return self.PrimaryCategory
    #    self.PrimaryCategory = self.catrelationship_set.all()[0].Category.Label
    #    self.save()
    #    return self.PrimaryCategory
