#!/usr/bin/env python
# coding: utf-8

import os, os.path, sys, re, zlib, glob, struct
from obj_id import ObjId

OBJ_TYPES = { int('001', 2): 'commit',
              int('010', 2): 'tree',
              int('011', 2): 'blob',
              int('100', 2): 'tag',
              int('110', 2): 'ofs_delta',
              int('111', 2): 'ref_delta' }


class PackFile(object):
    """
    Git's packfile format is good illustrated explained in
    http://schacon.github.io/gitbook/7_the_packfile.html
    though quiet erraneous (e.g. confusing & and | operator).
    """

    def __init__(self, git_dir_path, debug=False):
        self._debug = debug
        folder = os.path.join(git_dir_path, 'objects', 'pack')
        if os.path.exists(folder):
            pack_idx = glob.glob(os.path.join(folder, 'pack*.idx'))
            if len(pack_idx) == 1:
                self.idx = open(pack_idx[0], 'rb')
            pack_pack = glob.glob(os.path.join(folder, 'pack*.pack'))
            if len(pack_idx) == 1:
                self.pack = open(pack_pack[0], 'rb')
        else:
            self.prtdb('shiet!')
            return None
        if self.__bool__():
            self.get_index()

    def get_index(self):
        import pprint
        pp = pprint.PrettyPrinter(indent=4).pprint
        magic, version = struct.unpack('>4sL', self.idx.read(8))
        if not(magic == '\xfftOc' and version == 2):
            print('unknown file format')
            self.prtdb(' {0}   {1}'.format(repr(magic), version))
            self.idx, self.pack = None, None
            return False
        self.fanout = struct.unpack('>256L', self.idx.read(256 * 4))
        self.noff = 258 * 4                               # where object names begin
        self.crcoff = self.noff + self.fanout[255] * 20   # where crcs begin
        self.offoff = self.crcoff + self.fanout[255] * 4  # where offsets begin
        self.o64off = self.offoff + self.fanout[255] * 4  # where 8 byte offsets begin

    def find_offset(self, bid):
        self.prtdb(bid.sha_parts)
        first = self.fanout[bid.d_int-1] if bid.d_int > 0 else 0
        last = self.fanout[bid.d_int]
        self.prtdb('first last: {0} - {1}'.format(first, last))
        self.idx.seek(self.noff + first * 20)
        for offset in range(first, last):
            ntup = struct.unpack('>5L', self.idx.read(20))
            self.prtdb('  {0}'.format(ntup))
            if ntup == bid.sha_parts:
                roffset = offset # the relative offset of the offset
                break
        if not 'roffset' in locals():
            self.prtdb('FAILED to find offset for {0}'.format(bid))
            return None
        self.idx.seek(self.offoff + roffset * 4)
        offset = struct.unpack('>L', self.idx.read(4))[0]
        return offset

    def read_object(self, bid):
        ofs = self.find_offset(bid)
        self.pack.seek(ofs)
        bites = []
        while True:
            bite = struct.unpack('>B', self.pack.read(1))[0]
            bites.append(bite)
            if 128 & bite != 128:
                break
        if self._debug:
            self.prtdb([format(b, '08b') for b in bites])
        typ = OBJ_TYPES.get( (bites[0] & int('01110000', 2)) >> 4)
        size = bites[0] & int('00001111', 2)
        shift_for = 4
        for i in range(1, len(bites)):
            size = size | ((bites[i] & 127) << shift_for)
            shift_for += 7
        self.prtdb('len(bites): {0}  computed size: {1}'.format(len(bites), size))
        out = ''
        dcob = zlib.decompressobj()
        while dcob.unused_data == '':
            out += dcob.decompress(self.pack.read(64))
        # prepending the object type and (inflated) length like in the object files
        return '{0} {1}\x00{2}'.format(typ, len(out), out)


    def __bool__(self): # implicit call python 3 only
        return hasattr(self, 'idx') and hasattr(self, 'pack')

    def prtdb(self, matter):
        if self._debug:
            print(matter)

if  __name__ == '__main__':

    pf = PackFile('../savreaderwriter/.git', debug=True)
    print(pf.read_object(ObjId('f712816799e70135fc93592e0f89cf69a9b0589b')))
    # bid = ObjId('f712816799e70135fc93592e0f89cf69a9b0589b')
    # print('  {0}\n  {1}'.format(bid, ''.join(['{0:08x}'.format(n) for n in bid.sha_parts])))
