from functools import wraps
from Queue import Queue
from threading import Thread

def async_prefetch_wrapper(iterable, buffer=10):
    """
    wraps an iterater such that it produces items in the background
    uses a bounded queue to limit memory consumption
    """
    done = object()
    def worker(q,it):
        for item in it:
            q.put(item)
        q.put(done)
    # launch a thread to fetch the items in the background
    queue = Queue(buffer)
    it = iter(iterable)
    thread = Thread(target=worker, args=(queue, it))
    thread.daemon = True
    thread.start()
    # pull the items of the queue as requested
    while True:
        item = queue.get()
        if item == done:
            return
        else:
            yield item

def async_prefetch(func):
    """
    decorator to make generator functions fetch items in the background
    """
    @wraps(func)
    def wrapper(*args, **kwds):
        return async_prefetch_wrapper( func(*args, **kwds) )
    return wrapper


'''
def tests():

    # First Ill do a simple test that creates a bunch of files and md5s them
    # heres the code

    from glob import glob
    from hashlib import md5

    def test_setup():
        files = []
        lines = 1000000
        for i in xrange(100):
            filename = "tempfile%d.txt"%i
            files.append(filename)
            with open(filename, "w") as f:
                f.write( ("%d\n"%i)*lines )
        return files

    def test_cleanup():
        for f in glob("tempfile*.txt"):
            os.remove(f)

    def test():
        files = test_setup()
        for c in contents(files):
            md5(c).digest()
        test_cleanup()

    from timeit import Timer
    t = Timer("test()", "gc.enable()")
    print t.repeat(5, 1)

    def contents(files):
        for filename in files:
            with open(filename, "rb") as f:
                content = f.read()
            yield content

    contents_prefetch = async_prefetch(contents)

# Here are the results

#without async prefetch	11.282730626378491	5.430316841944997	3.947590615567062	4.129772568860009	4.102539568576454
#with async prefetch	6.155376451476375	3.790340392424177	3.384881807604039	3.436732252283459	3.415469144821479
#without async prefetch and no md5	6.315582636899382	3.342140062493976	3.197938983865267	3.102182470118409	3.2230784219782134
#Not a spectacular improvement, but not bad.
#The best speedup we can hope for is reducing the total runtime by N*min(fetch time, work time).
#You can see from the 3rd row that the total runtime is dominated by IO still. Ie this is a case with tiny workload vs slow IO.

# Lets try it by replacing md5 with zlib compression.

# without async prefetch. zlib compress	12.757559200959898	12.518354357886267	16.015608687696343	12.331753337365505	12.05284226067839
#with async prefetch. zlib compress	10.578236130569667	9.156545245275586	12.359309772610764	9.072958714026505	8.881738391331858
#not bad.

# So this technique can give you a speedup when you have producer style generators that take time to produce elements.

if __name__ == '__main__':
    tests()
'''
