from subprocess import Popen
import os
import inspect
import logging
import time
import datetime
import boto.ec2.autoscale
import boto.ec2.elb
from qds_ops.utils.fork import Fork
from qds_ops.entities.rds import RDS
from qds_ops.tparty.knife import Knife
from qds_ops.utils.deploy_utils import DeployUtils


class RollingDeploy:
    def __init__(self, host, passwd, user, db, elb, env_str, pemfile, stop_djs_at_start, access, secret, as_group, tier,
                 worker):
        self.region = "us-east-1"
        self.host = host
        self.worker = worker
        self.passwd = passwd
        self.user = user
        self.db = db
        self.elb = elb
        self.env = env_str
        self.pemfile = pemfile
        self.stopdjs = stop_djs_at_start
        self.access = access
        self.secret = secret
        self.as_group = as_group
        self.tier = tier
        self.as_conn = self.get_as()
        self.slots_per_node = self.get_slots_per_node()
        self.min_nodes = self.get_min_nodes()
        self.max_nodes = self.get_max_nodes()
        self.busy_slots = self.get_busy_slots()
        self.total_nodes_before_push = self.get_total_nodes()

    def get_busy_slots(self):
        sql_query = "select count(*) as busy_slots from `delayed_jobs` WHERE (locked_by is NOT NULL AND locked_at > SUBTIME(CURRENT_TIMESTAMP,'36:00:00') AND failed_at IS NULL AND queue IN ('%s'))" % self.worker
        busy_slots = RDS.run_query(self.env, False, sql_query)['busy_slots']
        return busy_slots

    def get_as(self):
        return boto.ec2.autoscale.connect_to_region("us-east-1", aws_access_key_id=self.access,
                                                    aws_secret_access_key=self.secret)

    def get_slots_per_node(self):
        logging.info("As name is: " + self.as_group)
        lc_name = self.as_conn.get_all_groups([self.as_group])[0].launch_config_name
        lc = self.as_conn.get_all_launch_configurations(names=[lc_name])[0]
        instance_type = lc.instance_type
        instance_cpu_map = {'m1.large': 2, 'm1.medium': 1, 'm1.small': 1, 'm1.xlarge': 4, 'm2.2xlarge': 4,
                            'm2.4xlarge': 8,
                            'm2.xlarge': 2}
        cpu_per_instance = instance_cpu_map[instance_type]
        return 2 * cpu_per_instance

    def get_dj_wait_time(self):
        sql_query = "SELECT MIN(run_at) AS min_run_at FROM delayed_jobs JOIN (SELECT COUNT(*) AS num_jobs, delayed_jobs.account_id, COALESCE(account_limits.limit, 20) AS account_limit FROM delayed_jobs LEFT JOIN account_limits ON delayed_jobs.account_id = account_limits.account_id AND account_limits.metric_name = 'Max concurrent commands' WHERE queue='%s' AND failed_at IS NULL AND locked_by IS NOT NULL and locked_at > SUBTIME(CURRENT_TIMESTAMP,'36:00:00') GROUP BY delayed_jobs.account_id HAVING num_jobs < account_limit) valid_accounts ON delayed_jobs.account_id = valid_accounts.account_id WHERE queue = '%s' AND locked_by IS NULL AND failed_at IS NULL AND run_at <= CURRENT_TIMESTAMP ORDER BY min_run_at ASC LIMIT 1" % (
            self.worker, self.worker)
        dj_wait_time = RDS.run_query(self.env, False, sql_query)['min_run_at']
        if dj_wait_time is None:
            return 0
        else:
            now = datetime.datetime.now()
            dj_wait_time = now - dj_wait_time
            if dj_wait_time.seconds == 0:
                return 0
            else:
                return dj_wait_time.seconds

    def get_max_nodes(self):
        return self.as_conn.get_all_groups([self.as_group])[0].max_size

    def get_total_nodes(self):
        return self.as_conn.get_all_groups([self.as_group])[0].desired_capacity

    def get_min_nodes(self):
        return self.as_conn.get_all_groups([self.as_group])[0].min_size

    def can_do_rolling_deploy(self):
        wait_time = self.get_dj_wait_time()
        logging.info("Wait time is: " + str(wait_time))
        optimal_nodes = (self.busy_slots + self.slots_per_node ) / self.slots_per_node
        new_nodes_reqd = max(self.min_nodes, optimal_nodes)
        new_nodes_possible = self.max_nodes - self.total_nodes_before_push
        logging.info("min nodes = " + str(self.min_nodes))
        logging.info("total nodes = " + str(self.total_nodes_before_push))
        logging.info("max nodes = " + str(self.max_nodes))
        logging.info("busy slots = " + str(self.busy_slots))
        logging.info("slots per node = " + str(self.slots_per_node))
        logging.info("optimal nodes = " + str(optimal_nodes))
        logging.info("new nodes needed = " + str(new_nodes_reqd))
        logging.info("new nodes possible = " + str(new_nodes_possible))
        if new_nodes_possible >= 1 and new_nodes_possible >= new_nodes_reqd:
            return True
        else:
            logging.info("cannot create reqd new nodes, please increase max size of autoscaling group")
            return False

    def get_as_instances(self):
        return [i.instance_id for i in self.as_conn.get_all_groups([self.as_group])[0].instances]

    def deploy(self):
        if not self.can_do_rolling_deploy():
            raise Exception("Cannot do rolling deploy")
        logging.info("Can do Rolling Deploy")

        base_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        out = Fork.check_output(
            ["knife", "exec", base_path + "/../scripts/set_node_attr.rb", self.env, "shutdown_gracefully", "yes",
             self.tier], stderr=None)
        logging.info(out)
        if self.stopdjs:
            chef_command = "sudo chef-client -j /etc/shutdown_djs -E %s" % (self.env)
            logging.info("Running chef command '%s' on older nodes" % chef_command)
            retcode = DeployUtils.run_chef_command(chef_command, self.env, self.tier, self.pemfile, "")

        t = datetime.datetime.now()
        new_node_ids = self.start_new_nodes()

        self.monitor(new_node_ids)

        if self.tier == "web":
            self.monitor_elb(new_node_ids)

        t1 = datetime.datetime.now()
        secs = (t1 - t).seconds
        logging.info("Total seconds for new nodes to come up: " + str(secs))

        logging.info("Killing Djs on older nodes")
        chef_command = "sudo chef-client -j /etc/graceful_shutdown -E %s" % self.env
        logging.info("Running chef command '%s' on older nodes" % chef_command)
        retcode = DeployUtils.run_chef_command(chef_command, self.env, self.tier, self.pemfile,
                                               "AND shutdown_gracefully:yes")
        logging.info("Killed DJs on older nodes")

    def start_new_nodes(self):
        WAIT_TIME = 120
        CHECK_INTERVAL = 30

        old_instances = self.get_as_instances()
        logging.info("Old Instances in autoscaling group are: " + str(old_instances))

        optimal_nodes = (self.busy_slots + self.slots_per_node) / self.slots_per_node
        new_nodes_reqd = max(self.min_nodes, optimal_nodes)
        self.as_conn.set_desired_capacity(self.as_group, self.total_nodes_before_push + new_nodes_reqd)
        logging.info("Increased desired capacity by " + str(new_nodes_reqd))

        logging.info("waiting for new instances to appear in the asg")
        elapsed = 0
        up = False
        while elapsed < WAIT_TIME:
            logging.info("Sleeping for %s secs" % CHECK_INTERVAL)
            time.sleep(CHECK_INTERVAL)
            new_instances = self.get_as_instances()
            logging.info("current instances are: " + str(new_instances))
            list_diff = set(new_instances).difference(set(old_instances))
            if len(list_diff) == new_nodes_reqd:
                up = True
                break
            elapsed += CHECK_INTERVAL

        if up:
            logging.info("all new instances are now available")
            logging.info(str(list_diff))
            return list(list_diff)
        else:
            raise Exception("required instances not in asg even after %s seconds" % WAIT_TIME)

    def monitor_elb(self, new_node_ids):
        SLEEP_TIME = 30
        elb_conn = boto.ec2.elb.connect_to_region(
            self.region,
            aws_access_key_id=self.access,
            aws_secret_access_key=self.secret
        )
        logging.info("Waiting for at least half of the nodes to be in service in the elb")
        while True:
            in_elb = 0
            logging.info("sleeping for %s seconds" % SLEEP_TIME)
            time.sleep(SLEEP_TIME)
            healths = elb_conn.describe_instance_health(self.elb, new_node_ids)
            for i in range(len(new_node_ids)):
                id = new_node_ids[i]
                elb_state = healths[i].state
                logging.info("id=%s elb_state=%s" % (id, elb_state))
                if elb_state == "InService":
                    in_elb += 1
            if in_elb * 1.0 / len(new_node_ids) >= 0.5:
                break
        elb_conn.close()
        logging.info("at least half of the instances are healthy in the elb")

    def monitor(self, new_node_ids):
        SLEEP_TIME = 30
        ec2_conn = boto.ec2.connect_to_region(
            self.region,
            aws_access_key_id=self.access,
            aws_secret_access_key=self.secret
        )
        knife = Knife()
        while True:
            logging.info("waiting for at least half of the instances to register with chef")
            logging.info("sleeping for %s secs before checking status" % SLEEP_TIME)
            time.sleep(SLEEP_TIME)
            instances = ec2_conn.get_only_instances(new_node_ids)
            in_chef = 0
            for i in range(len(new_node_ids)):
                id = new_node_ids[i]
                instance = instances[i]
                state = instance.state
                try:
                    dns = instance.public_dns_name
                except Exception as e:
                    dns = "unknown"
                if knife.search(self.env, self.tier, ["name:%s" % id]):
                    registered_with_chef = "yes"
                    in_chef += 1
                else:
                    registered_with_chef = "no"
                logging.info("id=%s state=%s dns=%s registered_with_chef=%s" % (id, state, dns, registered_with_chef))
            if in_chef * 1.0 / len(new_node_ids) >= 0.5:
                break
        ec2_conn.close()
        logging.info("at least half of the added instances have registered with chef")

