"""A pypi demonstration vehicle.

.. moduleauthor:: Andrew Carter <andrew@invalid.com>

"""

import shelve
import Queue
import os
import time
from collections import defaultdict

from twisted.python import log
from twisted.internet import defer, threads, reactor

from sdfs.exceptions import UnableToStoreFile
from sdfs.enum import DatabaseType
from sdfs.utils import *


COMMIT_COUNTER = 10000 # listing entries before adding to work queue
BUCKET_SIZE = 1 # bytes,
TEMP_FOLDER = '.tmp/'
GC_DELETED_FILES = 60*60*24*30

def generate_buckets(bucket_depth=BUCKET_SIZE):
    for i in [chr(x) for x in xrange(256)]:
        if bucket_depth > 1:
            for j in generate_buckets(bucket_depth-1):
                yield i + j
        else:
            yield i

class FileSystem(object):
    _is_rescanning = False
    
    def __init__(self, paths, dbfile, plugins):
        self.db = shelve.open(dbfile)
        self.paths = paths
        self.fsh = FileSystemHandler(self.db, paths, plugins)
    
    def close(self):
        self.db.close()
        return self.fsh.add_to_queue('die', priority=0)
    
    def list_dir(self, paths, depth=0, show_deleted=False):
        if type(paths) is not list:
            paths = [paths]
        paths = cleanup_path(paths)

        dirlist = {}
        nested_paths = []
        
        for path in paths:
            key = keyify(DatabaseType.FILELIST, path)
            dir_key = keyify(DatabaseType.DIRECTORY, path)
            
            if key in self.db:
                if not show_deleted and self.db[dir_key].get('deleted', False):
                    continue
                
                dirlist[path] = []
                for key in self.db[key]:
                    item_type = key[0]
                    metadata = self.db[key]
                    
                    if not show_deleted and metadata.get('deleted', False):
                        continue
                    
                    if item_type == DatabaseType.DIRECTORY and depth:
                        nested_paths.append(metadata['virtual_path'])
                    
                    elif item_type == DatabaseType.FILE:
                        del metadata['actual_path']
                    
                    metadata['name'] = metadata['virtual_path'].split('/')[-1]
                    del metadata['virtual_path']
                    metadata['type'] = item_type
                    
                    dirlist[path].append(metadata)
            else:
                dirlist[path] = None
            
        if depth:
            dirlist.update(self.list_dir(nested_paths, depth-1))
        return dirlist
    
    def add_file(self, actual_path, age=None):
        for path, prefix in self.paths:
            if actual_path.startswith('%s/' % path):
                break
        else:
            raise UnableToStoreFile()
        
        vrf = self.fsh.vrify(actual_path, path, prefix)
        vr = '/'.join(vrf.split('/')[:-1])
        
        if not age:
            age = self.fsh.get_time()
            
        keys_touched, keys_to_dir_mapping = self.fsh.create_subfolders(vr, age)
        keys_touched |= self.fsh.add_new_file(vr, vrf, actual_path, age)
        
        file_key = keyify(DatabaseType.FILE, vrf)
        keys_to_dir_mapping[vr].add(file_key)
        
        self.fsh.handle_keys_to_dir_mapping(keys_to_dir_mapping)
        self.fsh.update_modified(keys_touched, age)
        self.fsh.add_to_buckets(keys_touched)
    
    def get_metadata(self, path):
        path = cleanup_path(path)
        
        for item_type in [DatabaseType.FILE, DatabaseType.DIRECTORY]:
            key = keyify(item_type, path)
            if key in self.db:
                d = self.db[key]
                if item_type == DatabaseType.FILE:
                    del d['actual_path']
                d['name'] = d['virtual_path'].split('/')[-1]
                return d
        
        return None
    
    def set_metadata(self, item_type, virtual_path, key, value):
        db_key = keyify(item_type, virtual_path)
        val = self.db[db_key]
        val[key] = value
        self.db[db_key] = val
    
    def get_file(self, path):
        d = self.db.get(keyify(DatabaseType.FILE, cleanup_path(path)), None)
        return d and d['actual_path'] or d
    
    @defer.inlineCallbacks
    def rescan(self):
        if not self._is_rescanning:
            self._is_rescanning = True
            yield self.fsh.complete_rescan()
            self._is_rescanning = False
        else:
            log.msg('Currently rescanning, not doing that again')


# run main function in thread, handles all db related
class FileSystemHandler(object):
    def __init__(self, db, paths, plugins):
        self.paths = paths
        self.db = db
        self.queue = Queue.PriorityQueue()
        self.plugins = plugins
        
        threads.deferToThread(self.thread_handler)
    
    def add_to_queue(self, f, *args, **kwargs):
        priority = kwargs.pop('priority', 5)
        d = defer.Deferred()
        self.queue.put((priority, d, f, args, kwargs))
        return d
    
    def thread_handler(self):
        """
        Handles all the jobs thrown into the queue.
        """
        while True:
            priority, d, f, args, kwargs = self.queue.get()
            
            if f == 'die':
                if d:
                    reactor.callFromThread(d.callback, result)
                return
            
            try:
                result = f(*args, **kwargs)
            except:
                import traceback
                traceback.print_exc()
                if d:
                    reactor.callFromThread(d.errback)
            else:
                if d:
                    reactor.callFromThread(d.callback, result)
    
    def handle_plugins_for_file(self, db_hash):
        metadata = self.db[db_hash]
        r, f = os.path.split(metadata['actual_path'])
        vr = '/'.join(metadata['virtual_path'].split('/')[:-1])
        
        for p in self.plugins:
            if p.filetypes.match(f):
                p.found_file(r, vr, f)
    
    def commit_plugins(self):
        plugins = self.plugins
        
        for plugin in plugins:
            if hasattr(plugin, 'done_scanning'):
                plugin.done_scanning()
    
    def add_new_file(self, vr, vrf, fr, add_time):
        keys_touched = set()
        file_key = keyify(DatabaseType.FILE, vrf)
        metadata = self.db.get(file_key, None)
        
        if not metadata or 'deleted' in metadata:
            self.db[file_key] = {
                'actual_path': fr,
                'size': os.path.getsize(fr),
                'virtual_path': vrf,
                'date': add_time,
                'modified': add_time
            }
            
            svr = [x for x in vr.split('/') if x]
            while svr:
                keys_touched.add(keyify(DatabaseType.DIRECTORY, '/'.join(svr)))
                svr.pop()
        elif metadata['actual_path'] != fr:
            file_val = self.db[file_key]
            file_val['actual_path'] = fr
            self.db[file_key] = file_val
        
        return keys_touched
    
    def delete_dead_hashes(self, hashes, remove_time=None):
        self.add_to_buckets(hashes, 'd')
        keys_touched = set()
        
        missing_files_found = 0
        for dead_hash in hashes:
            vr = self.db[dead_hash]['virtual_path']
            parent_vr = '/'.join(vr.split('/')[:-1])
            parent_hash = keyify(DatabaseType.FILELIST, parent_vr)
            
            metadata = self.db[dead_hash]
            metadata['deleted'] = self.get_time()
            self.db[dead_hash] = metadata
            
            missing_files_found += 1
            
            svr = [x for x in vr.split('/') if x]
            while svr:
                keys_touched.add(keyify(DatabaseType.DIRECTORY, '/'.join(svr)))
                svr.pop()
        
        self.update_modified(keys_touched, remove_time or self.get_time())
        return missing_files_found
    
    def create_subfolders(self, prefix, add_time):
        keys_touched = set()
        keys_to_dir_mapping = defaultdict(set)
        tmp_prefix = prefix.strip('/').split('/')
        if tmp_prefix[0] != '':
            while tmp_prefix:
                vr = '/'.join(tmp_prefix)
                tmp_prefix.pop()
                parent_vr = '/'.join(tmp_prefix)
                dir_key = keyify(DatabaseType.DIRECTORY, vr)
                if dir_key in self.db:
                    keys_touched.add(dir_key)
                    continue
                    
                merge_set_dicts(keys_to_dir_mapping, self.add_new_directory(vr, add_time))
        
        return keys_touched, keys_to_dir_mapping
    
    def add_new_directory(self, vr, add_time):
        keys_to_dir_mapping = defaultdict(set)
        dir_key = keyify(DatabaseType.DIRECTORY, vr)
        metadata = self.db.get(dir_key, None)
        
        if not metadata or metadata.get('deleted', None):
            parent_vr = '/'.join(vr.split('/')[:-1])
            self.db[dir_key] = {
                'date': add_time,
                'modified': add_time,
                'virtual_path': vr
            }
            if vr:
                keys_to_dir_mapping[parent_vr].add(dir_key)
                self.add_files_to_directory(vr, set())
        
            
        
        return keys_to_dir_mapping
    
    def vrify(self, r, path, prefix):
        vr = r[len(path):]
        vr = '/'.join(vr.split(os.sep))
        vr = ('%s/%s' % (prefix.strip('/'), vr.strip('/')))
        vr = vr.strip('/')
        return vr
    
    def add_files_to_directory(self, vr, file_keys):
        dir_key = keyify(DatabaseType.FILELIST, vr)
        file_keys |= self.db.get(dir_key, set())
        self.db[dir_key] = file_keys
    
    def add_to_buckets(self, hashes, postfix=''):
        buckets = defaultdict(set)
        for h in hashes:
            bucket = h[1:1+BUCKET_SIZE]
            buckets[bucket].add(h)
        
        for bucket, keys in buckets.items():
            key = '%s%s%s' % (DatabaseType.BUCKET, bucket, postfix)
            self.db[key] = self.db.get(key, set()) | keys
    
    def update_modified(self, hashes, add_time):
        for key in hashes:
            val = self.db.get(key, None)
            if not val:
                continue
            val['modified'] = max(add_time, val['modified'])
            self.db[key] = val
    
    def handle_keys_to_dir_mapping(self, keys_to_dir_mapping):
        for vr, file_keys in keys_to_dir_mapping.items():
            self.add_files_to_directory(vr, file_keys)
    
    def clean_deleted_for_zombies(self): # webster defines a zombie as: a mixed drink made of several kinds of rum, liqueur, and fruit juice. This kind of zombie is just a similar path returning to life
        for i in generate_buckets():
            key = '%s%s' % (DatabaseType.BUCKET, i)
            zombie_key = '%sd' % key
            self.db[zombie_key] = self.db.get(zombie_key, set()) - self.db[key]
    
    def garbage_collect(self):
        expiration_time = self.get_time() - GC_DELETED_FILES
        for i in generate_buckets():
            bucket = '%s%sd' % (DatabaseType.BUCKET, i)
            
            removed_keys = set()
            for key in self.db.get(bucket, set()):
                metadata = self.db[key]
                if metadata['deleted'] > expiration_time:
                    continue
                
                vr = metadata['virtual_path']
                parent_vr = '/'.join(vr.split('/')[:-1])
                parent_hash = keyify(DatabaseType.FILELIST, parent_vr)
                
                if parent_hash in self.db:
                    self.db[parent_hash] -= set([key])
                del self.db[key]
                removed_keys.add(key)
            
            if removed_keys:
                self.db[bucket] -= removed_keys
    
    @defer.inlineCallbacks
    def complete_rescan(self):
        log.msg("Starting complete rescan")
        def rescan_setup(): # execute in threadhandler
            log.msg("Creating buckets in database")
            initial = False
            for i in generate_buckets():
                key = '%s%s' % (DatabaseType.BUCKET, i)
                self.db['%sn' % key] = set()
                if key not in self.db: # initial scan
                    self.db[key] = set()
                    initial = True
            
            if initial:
                log.msg('Initial scan, creating root')
                self.db[keyify(DatabaseType.DIRECTORY, '')] = {'virtual_path': '', 'name': ''}
                self.db[keyify(DatabaseType.FILELIST, '')] = set()
        
        yield self.add_to_queue(rescan_setup)
        
        log.msg("Preparing to run scan for each path")
        defer_list = []
        for path, prefix in self.paths:
            defer_list.append(threads.deferToThread(self.optimized_walk, path, prefix))
        
        defer_list = yield defer.DeferredList(defer_list)
        yield defer.DeferredList([d for defers in defer_list for d in defers[1]])
        log.msg("Done scanning paths.")
        
        def handle_plugins_for_new_files():
            for p in self.plugins:
                setattr(p, 'db', self.db)
            
            for i in generate_buckets():
                for new_hash in self.db['%s%sn' % (DatabaseType.BUCKET, i)] - self.db['%s%s' % (DatabaseType.BUCKET, i)]:
                    if new_hash[0] != DatabaseType.FILE:
                        continue
                    
                    self.handle_plugins_for_file(new_hash)
        
        def cleanup_files():
            missing_files_found = 0
            for i in generate_buckets():
                missing_files_found += self.delete_dead_hashes(self.db['%s%s' % (DatabaseType.BUCKET, i)] - self.db['%s%sn' % (DatabaseType.BUCKET, i)])
                self.db['%s%s' % (DatabaseType.BUCKET, i)] = self.db['%s%sn' % (DatabaseType.BUCKET, i)]
                del self.db['%s%sn' % (DatabaseType.BUCKET, i)]
            return missing_files_found

        log.msg('Handling plugins interested in new data')
        yield self.add_to_queue(handle_plugins_for_new_files)
        
        missing_files_found = yield self.add_to_queue(cleanup_files)
        log.msg("Removed %s files. Final search plugin commit." % missing_files_found)
        
        yield self.add_to_queue(self.clean_deleted_for_zombies)
        log.msg('Cleaned up deleted for zombies')
        
        yield self.add_to_queue(self.garbage_collect)
        log.msg('Garbage collected')
        
        yield self.add_to_queue(self.commit_plugins)
        log.msg('Committed plugins')
        
        yield self.add_to_queue(self.db.sync)
        log.msg('Final DB Sync, all done.')
    
    def get_time(self):
        return int(time.time())
    
    def optimized_walk(self, path, prefix):
        log.msg("Starting to scan '%s' with prefix '%s'" % (path, prefix))
        
        defer_list = []
        list_queue = []
        queue_size = 0
        
        def handle_items(path, prefix, items):
            add_time = self.get_time()
            keys_touched = set()
            hashes_found = []
            keys_to_dir_mapping = defaultdict(set)
            
            # make sure all prefix subfolders exist
            result = self.create_subfolders(prefix, add_time)
            keys_touched |= result[0]
            merge_set_dicts(keys_to_dir_mapping, result[1])
            
            # dump everything into the database
            for r, dirs, files in items:
                # must map folders to parent folder
                vr = self.vrify(r, path, prefix)
                
                dir_key = keyify(DatabaseType.DIRECTORY, vr)
                hashes_found.append(dir_key)
                merge_set_dicts(keys_to_dir_mapping, self.add_new_directory(vr, add_time))
                
                for f in files:
                    if not os.path.isfile(os.path.join(r, f)):
                        continue
                    
                    vrf = '%s/%s' % (vr, f)
                    fr = os.path.join(r, f)
                    
                    keys_touched |= self.add_new_file(vr, vrf, fr, add_time)
                    file_key = keyify(DatabaseType.FILE, vrf)
                    hashes_found.append(file_key)
                    keys_to_dir_mapping[vr].add(file_key)

            # update modified time
            self.update_modified(keys_touched, add_time)
            
            # update buckets
            self.add_to_buckets(hashes_found, 'n')
            
            # add new files to folders
            self.handle_keys_to_dir_mapping(keys_to_dir_mapping)
        
        for r, dirs, files in os.walk(path):
            if (r[len(path):] + '/').startswith('/%s' % TEMP_FOLDER):
                continue
            list_queue.append((r, dirs, files))
            queue_size += len(dirs) + len(files)
            
            if queue_size >= COMMIT_COUNTER:
                defer_list.append(self.add_to_queue(handle_items, path, prefix, list_queue))
                list_queue = []
                queue_size = 0
        
        if list_queue:
            defer_list.append(self.add_to_queue(handle_items, path, prefix, list_queue))
        
        log.msg("Done scanning '%s'" % path)
        return defer_list
