#!/usr/bin/env python

import os
import sys
import shutil
import logging
from tempfile import mkdtemp
import tarfile
import boto
import boto.s3.connection
from contextlib import contextmanager
from datetime import datetime, timedelta


def setup_logger():
    logger = logging.getLogger('cdn-log-archiver')
    logger.setLevel(int(os.environ.get('DEBUG', logging.INFO)))

    console = logging.StreamHandler()
    console.setLevel(int(os.environ.get('DEBUG', logging.INFO)))

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    console.setFormatter(formatter)
    logger.addHandler(console)

    return logger


@contextmanager
def temp_directory():
    temp_dir = mkdtemp()
    yield temp_dir
    shutil.rmtree(temp_dir)


def tar_logs(tar_file, tar_dir):
    archive = tarfile.open(tar_file, 'w:gz')
    archive.add(tar_dir, arcname='')
    archive.close()


def write_logs_to_temp(target_logs, dest):
    [l.get_contents_to_filename('{0}/{1}'.format(dest, l.name[9:])) for l in
        target_logs]


def archive_logs(bucket, target_logs, save_to, date):
    bytes_written = 0
    with temp_directory() as temp_dir:
        tar_file = os.path.join(temp_dir, "../{0}.tar.gz".format(date))
        write_logs_to_temp(target_logs, temp_dir)
        tar_logs(tar_file, temp_dir)

        new_archive = bucket.new_key(os.path.join(save_to,
                                     os.path.basename(tar_file)))

        bytes_written = new_archive.set_contents_from_filename(tar_file)
        os.remove(tar_file)

    return bytes_written


def remove_raw_logs(bucket, target_logs):
    [bucket.delete_key(l) for l in target_logs]


def collect_logs(bucket, look_for):
    return [k for k in bucket.list() if k.name.startswith(look_for)]


def main():
    s3_host = os.environ['S3_HOST']
    access_key = os.environ['ACCESS_KEY']
    secret_key = os.environ['SECRET_KEY']
    bucket_name = os.environ['PYPI_LOG_BUCKET']

    logger = setup_logger()

    # Look for the date to archive in the env, default to yesterday
    if os.environ.get('PYPI_LOG_DATE', None):
        target_date = datetime.strptime(os.environ.get('PYPI_LOG_DATE'),
                                        '%Y-%m-%d')
    else:
        target_date = (datetime.target_today() - timedelta(days=1))

    look_for = 'incoming/{0}'.format(target_date.strftime('%Y-%m-%d'))
    save_to = 'archive/{0}/{1}'.format(target_date.year,
                                       target_date.month)

    logger.debug('Working with bucket {0}'.format(bucket_name))
    logger.debug('Looking for keys like {0}'.format(look_for))
    logger.debug('Will save archive to {0}'.format(save_to))

    conn = boto.connect_s3(
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key,
        host=s3_host,
        calling_format=boto.s3.connection.OrdinaryCallingFormat()
    )

    logger.debug('Connection: {0}'.format(conn))

    bucket = conn.get_bucket(bucket_name)
    target_logs = collect_logs(bucket, look_for)

    logger.debug('What we are working with:\n {0}'.format(target_logs))

    if len(target_logs) == 0:
        logger.warn('No logs found at {0}'.format(look_for))
        return 1

    bytes_written = archive_logs(bucket, target_logs, save_to,
                                 target_date.strftime('%Y-%m-%d'))

    if bytes_written and bytes_written > 0:
        logger.info('{0} bytes written to {1}/{2}'.format(bytes_written,
                                                    bucket_name, save_to))

        remove_raw_logs(bucket, target_logs)
        return 0
    else:
        logger.error('Nothing written to {0}/{1}'.format(bucket_name, save_to))
        return 1


if __name__ == '__main__':
    sys.exit(main())
