#!/usr/bin/env python
# coding: utf-8

import os, os.path, sys, re, zlib, glob, struct
from .obj_id import ObjId
from .struni import struni

OBJ_TYPES = { int('001', 2): 'commit',
              int('010', 2): 'tree',
              int('011', 2): 'blob',
              int('100', 2): 'tag',
              int('110', 2): 'ofs_delta',
              int('111', 2): 'ref_delta' }


class PackFile(object):
    """
    Git's packfile format is good illustrated explained in
    http://schacon.github.io/gitbook/7_the_packfile.html
    though quiet erraneous (e.g. confusing & and | operator).
    """

    def __init__(self, git_dir_path, debug=False):
        self._debug = debug
        folder = os.path.join(git_dir_path, 'objects', 'pack')
        self.idxs, self.packs = {}, {}
        if os.path.exists(folder):
            for idx in glob.glob(os.path.join(folder, 'pack-*.idx')):
                hsh = re.search('pack-([0-9a-f]{40})\.idx$', idx).group(1)
                self.idxs[hsh] = open(idx, 'rb')
                self.packs[hsh] = open(os.path.join(folder,
                                      "pack-{}.pack".format(hsh)), 'rb')
        if self.__bool__():
            self.get_indizes()
        else:
            self.prtdb('shiet!')
            return None

    def get_indizes(self):
        self.poss = {}
        for hsh,idx in self.idxs.items():
            pos = self.poss[hsh] = {}
            magic, version = struct.unpack('>4sL', idx.read(8))
            if not(magic == b'\xfftOc' and version == 2):
                print('unknown file format in {}'.format(hsh))
                self.prtdb(' {0}   {1}'.format(repr(magic), version))
                del(self.idx[hsh]); del(self.pack[hsh])
                continue
            pos['fanout'] = struct.unpack('>256L', idx.read(256 * 4))
            pos['noff'] = 258 * 4                                   # where object names begin
            pos['crcoff'] = pos['noff'] + pos['fanout'][255] * 20   # where crcs begin
            pos['offoff'] = pos['crcoff'] + pos['fanout'][255] * 4  # where offsets begin
            pos['o64off'] = pos['offoff'] + pos['fanout'][255] * 4  # where 8 byte offsets begin

    def find_offset(self, bid):
        self.prtdb(bid.sha_parts)
        for hsh,idx in self.idxs.items():
            pos = self.poss[hsh]
            first = pos['fanout'][bid.d_int-1] if bid.d_int > 0 else 0
            last = pos['fanout'][bid.d_int]
            self.prtdb('first last: {0} - {1}'.format(first, last))
            idx.seek(pos['noff'] + first * 20)
            for offset in range(first, last):
                ntup = struct.unpack('>5L', idx.read(20))
                self.prtdb('  {0}'.format(ntup))
                if ntup == bid.sha_parts:
                    roffset = offset # the relative offset of the offset
                    fin_hsh = hsh
                    fin_idx = idx
                    break
            if 'roffset' in locals():
                break
        if not 'roffset' in locals():
            self.prtdb('FAILED to find offset for {0}'.format(bid))
            return None, None
        fin_idx.seek(self.poss[fin_hsh]['offoff'] + roffset * 4)
        offset = struct.unpack('>L', fin_idx.read(4))[0]
        return fin_hsh, offset

    def read_object(self, bid):
        hsh, ofs = self.find_offset(bid)
        fh = self.packs[hsh]
        fh.seek(ofs)
        bites = []
        while True:
            bite = struct.unpack('>B', fh.read(1))[0]
            bites.append(bite)
            if 128 & bite != 128:
                break
        if self._debug:
            self.prtdb([format(b, '08b') for b in bites])
        typ = OBJ_TYPES.get( (bites[0] & int('01110000', 2)) >> 4)
        size = bites[0] & int('00001111', 2)
        shift_for = 4
        for i in range(1, len(bites)):
            size = size | ((bites[i] & 127) << shift_for)
            shift_for += 7
        self.prtdb('len(bites): {0}  computed size: {1}'.format(len(bites), size))
        out = b''
        dcob = zlib.decompressobj()
        self.prtdb(repr(dcob.unused_data))
        while dcob.unused_data == b'':
            out += dcob.decompress(fh.read(64))
        # prepending the object type and (inflated) length like in the object files
        return '{0} {1}\x00{2}'.format(typ, len(out), struni(out, 'utf-8'))


    def __bool__(self): # implicit call python 3 only
        return bool(len(self.idxs))

    def prtdb(self, matter):
        if self._debug:
            print(matter)

if  __name__ == '__main__':

    pf = PackFile('../savreaderwriter/.git', debug=True)
    print(pf.read_object(ObjId('f712816799e70135fc93592e0f89cf69a9b0589b')))
    # bid = ObjId('f712816799e70135fc93592e0f89cf69a9b0589b')
    # print('  {0}\n  {1}'.format(bid, ''.join(['{0:08x}'.format(n) for n in bid.sha_parts])))
