#!/usr/bin/env python

import os
import sys
import shutil
import logging
import glob
from tempfile import mkdtemp
import boto
import boto.s3.connection
from contextlib import contextmanager
from datetime import datetime, timedelta
from backports import lzma


def setup_logger():
    logger = logging.getLogger('cdn-log-archiver')
    logger.setLevel(int(os.environ.get('DEBUG', logging.INFO)))

    console = logging.StreamHandler()
    console.setLevel(int(os.environ.get('DEBUG', logging.INFO)))

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    console.setFormatter(formatter)
    logger.addHandler(console)

    return logger


@contextmanager
def temp_directory():
    temp_dir = mkdtemp()
    yield temp_dir
    shutil.rmtree(temp_dir)


def compress_logs(archive_file, temp_dir):
    with lzma.open(archive_file, 'w') as archive:
        for log_file in glob.glob("{0}/*".format(temp_dir)):
            with open(log_file, 'r') as log:
                archive.write(log.read())


def write_logs_to_temp(target_logs, dest):
    [l.get_contents_to_filename('{0}/{1}'.format(dest, l.name[9:])) for l in
        target_logs]


def archive_logs(bucket, target_logs, save_to, date):
    bytes_written = 0
    with temp_directory() as temp_dir:
        archive_file = os.path.join(temp_dir, "../{0}.xz".format(date))
        write_logs_to_temp(target_logs, temp_dir)
        compress_logs(archive_file, temp_dir)

        new_archive = bucket.new_key(os.path.join(save_to,
                                     os.path.basename(archive_file)))

        bytes_written = new_archive.set_contents_from_filename(archive_file)
        os.remove(archive_file)

    return bytes_written


def remove_raw_logs(bucket, target_logs):
    [bucket.delete_key(l) for l in target_logs]


def collect_logs(bucket, look_for):
    return [k for k in bucket.list() if k.name.startswith(look_for)]


def main():
    s3_host = os.environ['S3_HOST']
    access_key = os.environ['ACCESS_KEY']
    secret_key = os.environ['SECRET_KEY']
    bucket_name = os.environ['PYPI_LOG_BUCKET']

    logger = setup_logger()

    # Look for the date to archive in the env, default to yesterday
    if os.environ.get('PYPI_LOG_DATE', None):
        target_date = datetime.strptime(os.environ.get('PYPI_LOG_DATE'),
                                        '%Y-%m-%d')
    else:
        target_date = (datetime.today() - timedelta(days=1))

    look_for = 'incoming/{0}'.format(target_date.strftime('%Y-%m-%d'))
    save_to = 'archive/{0}/{1}'.format(target_date.year,
                                       target_date.month)

    logger.debug('Working with bucket {0}'.format(bucket_name))
    logger.debug('Looking for keys like {0}'.format(look_for))
    logger.debug('Will save archive to {0}'.format(save_to))

    conn = boto.connect_s3(
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key,
        host=s3_host,
        calling_format=boto.s3.connection.OrdinaryCallingFormat()
    )

    logger.debug('Connection: {0}'.format(conn))

    bucket = conn.get_bucket(bucket_name)
    target_logs = collect_logs(bucket, look_for)

    logger.debug('What we are working with:\n {0}'.format(target_logs))

    if len(target_logs) == 0:
        logger.warn('No logs found at {0}'.format(look_for))
        return 1

    bytes_written = archive_logs(bucket, target_logs, save_to,
                                 target_date.strftime('%Y-%m-%d'))

    if bytes_written and bytes_written > 0:
        logger.info('{0} bytes written to {1}/{2}'.format(bytes_written,
                                                          bucket_name, save_to))

        remove_raw_logs(bucket, target_logs)
        return 0
    else:
        logger.error('Nothing written to {0}/{1}'.format(bucket_name, save_to))
        return 1


if __name__ == '__main__':
    sys.exit(main())
