from subprocess import Popen
import os
import inspect
import logging
import time
import datetime
from qds_ops.utils.fork import Fork
from qds_ops.entities.rds import RDS
import boto.ec2.autoscale
from qds_ops.utils.deploy_utils import DeployUtils

class RollingDeploy:

  def __init__(self, host, passwd, user, db, elb, env_str, pemfile, stop_djs_at_start, access, secret, as_group, tier, worker):
    self.host = host
    self.worker = worker
    self.passwd = passwd
    self.user= user
    self.db = db
    self.elb = elb
    self.env = env_str
    self.pemfile = pemfile
    self.stopdjs = stop_djs_at_start
    self.access = access 
    self.secret = secret
    self.as_group = as_group
    self.tier = tier
    self.get_as() 
    self.slots_per_node()


  def get_busy_slots(self):
    sql_query = "select count(*) as busy_slots from `delayed_jobs` WHERE (locked_by is NOT NULL AND locked_at > SUBTIME(CURRENT_TIMESTAMP,'36:00:00') AND failed_at IS NULL AND queue IN ('%s'))" % self.worker
    busy_slots =  RDS.run_query(self.env, False, sql_query)['busy_slots']
    return busy_slots
  
  def get_as(self):
    self.as_conn = boto.ec2.autoscale.connect_to_region("us-east-1", aws_access_key_id=self.access, aws_secret_access_key=self.secret)

  def slots_per_node(self):
    print("As name is: " + self.as_group)
    lc_name = self.as_conn.get_all_groups([self.as_group])[0].launch_config_name
    lc = self.as_conn.get_all_launch_configurations(names = [lc_name])[0]
    instance_type  = lc.instance_type
    instance_cpu_map = {}
    instance_cpu_map['m1.large'] = 2
    instance_cpu_map['m1.medium'] = 1
    instance_cpu_map['m1.small'] = 1
    instance_cpu_map['m1.xlarge'] = 4
    instance_cpu_map['m2.2xlarge'] = 4
    instance_cpu_map['m2.4xlarge'] = 8
    instance_cpu_map['m2.xlarge'] = 2
    cpu_per_instance = instance_cpu_map[instance_type]
    self.slots_per_node = 2*cpu_per_instance
    
  def get_dj_wait_time(self):
    sql_query = "SELECT MIN(run_at) AS min_run_at FROM delayed_jobs JOIN (SELECT COUNT(*) AS num_jobs, delayed_jobs.account_id, COALESCE(account_limits.limit, 20) AS account_limit FROM delayed_jobs LEFT JOIN account_limits ON delayed_jobs.account_id = account_limits.account_id AND account_limits.metric_name = 'Max concurrent commands' WHERE queue='%s' AND failed_at IS NULL AND locked_by IS NOT NULL and locked_at > SUBTIME(CURRENT_TIMESTAMP,'36:00:00') GROUP BY delayed_jobs.account_id HAVING num_jobs < account_limit) valid_accounts ON delayed_jobs.account_id = valid_accounts.account_id WHERE queue = '%s' AND locked_by IS NULL AND failed_at IS NULL AND run_at <= CURRENT_TIMESTAMP ORDER BY min_run_at ASC LIMIT 1" % (self.worker, self.worker)
    dj_wait_time = RDS.run_query(self.env, False, sql_query)['min_run_at']
    if dj_wait_time is None:
        return 0
    else:
      now = datetime.datetime.now()
      dj_wait_time = now - dj_wait_time
      if dj_wait_time.seconds == 0:
        return 0
      else:
        return dj_wait_time.seconds

  def get_max_nodes(self):
    return self.as_conn.get_all_groups([self.as_group])[0].max_size

  def get_total_nodes(self):
    return self.as_conn.get_all_groups([self.as_group])[0].desired_capacity
 
  def get_min_nodes(self):
    return self.as_conn.get_all_groups([self.as_group])[0].min_size

  def can_do_rolling_deploy(self):
    wait_time = self.get_dj_wait_time()
    logging.info("Wait time is: "+ str(wait_time))
    if wait_time > 1:
      raise Exception("Rolling deploy stopped as wait time is > 1")
    busy_slots = self.get_busy_slots()
    new_nodes_reqd =  ((busy_slots + self.slots_per_node )/self.slots_per_node)
    max_nodes= self.get_max_nodes()
    total_nodes = self.get_total_nodes()
    logging.info("Total nodes: " + str(total_nodes) + " max_nodes: " + str(max_nodes) + " Busy Slots: " + str(busy_slots) + " Slots per node: " + str(self.slots_per_node))
    buffer_nodes =  max_nodes - total_nodes
    logging.info("New nodes reqd are: " + str(new_nodes_reqd) + " and buffer nodes are: " + str(buffer_nodes))
    if (buffer_nodes < new_nodes_reqd or buffer_nodes < 1): #buffer nodes < 1 mean we cant even start a master
      raise Exception("To do rolling deploy please increase the max size of autoscaling group.")

  def get_as_instances(self):
    return [i.instance_id for i in self.as_conn.get_all_groups([self.as_group])[0].instances]
  
  def deploy(self):
    self.can_do_rolling_deploy()
    logging.info("Can do Rolling Deploy")
    base_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    out = Fork.check_output(["knife", "exec", base_path + "/../scripts/set_node_attr.rb", self.env, "shutdown_gracefully", "yes", self.tier ], stderr=None)
    print out
    if self.stopdjs == True:
      chef_command = "sudo chef-client -j /etc/shutdown_djs -E %s" % (self.env)
      logging.info("Running chef command '%s' on older nodes" % chef_command)
      retcode = DeployUtils.run_chef_command(chef_command, self.env, self.tier, self.pemfile, "")
    old_instances = self.get_as_instances()
    logging.info("Old Instances in autoscaling group are: " + str(old_instances))
    self.as_conn.set_desired_capacity(self.as_group, self.get_total_nodes() + 1)
    logging.info("Increased desired capacity by 1")
    t = datetime.datetime.now()
    new_instance = None
    while new_instance is None:
      logging.info("Sleeping for 5 secs")
      time.sleep(5)
      new_instances = self.get_as_instances()
      logging.info("New Instances are: " + str(new_instances))
      list_diff = set(new_instances).difference(set(old_instances))
      if len(list_diff) > 0:
        new_instance = list_diff.pop()
    logging.info("Waiting for instance to come up. Sleeping for 60 secs")
    time.sleep(60)    
    dns = self.get_instance_dns(new_instance) 
    logging.info("New instance is: " + str(new_instance) + ". New instance ip is: " + str(dns) +
          ". Waiting for 1 min before checking the status of the instance.")
    time.sleep(60)
    DeployUtils.wait_till_node_registers(self.env, self.tier, new_instance)
    if self.tier == "web":
        DeployUtils.wait_till_node_healthy(self.access, self.secret, self.elb, new_instance)

    t1 = datetime.datetime.now()
    secs = (t1 - t).seconds
    logging.info("Total seconds for master to come up: " + str(secs))
    logging.info("Killing Djs on older nodes")
    chef_command = "sudo chef-client -j /etc/graceful_shutdown -E %s" % self.env
    logging.info("Running chef command '%s' on older nodes" % chef_command)
    retcode = DeployUtils.run_chef_command(chef_command, self.env, self.tier, self.pemfile, "AND shutdown_gracefully:yes")
    
    logging.info("Killed DJs on older nodes")
    #figure out how many more nodes are required
    busy_slots = self.get_busy_slots()
    new_nodes_reqd =  (busy_slots/self.slots_per_node)
    min_nodes = self.get_min_nodes()
    new_nodes_reqd = max([min_nodes - 1, new_nodes_reqd]) #master is already started, so subtract 1 from min nodes
    
    max_nodes = self.get_max_nodes()
    total_nodes = self.get_total_nodes()
    buffer_nodes = max_nodes - total_nodes  
    
    new_nodes_to_start = min([buffer_nodes,  new_nodes_reqd])
    logging.info("Staring new nodes: " + str(new_nodes_to_start))
    if new_nodes_to_start > 0: 
      self.as_conn.set_desired_capacity(self.as_group, self.get_total_nodes() + new_nodes_to_start)
  
  def get_instance_dns(self, new_instance):
    ec2_conn = boto.ec2.connect_to_region("us-east-1", aws_access_key_id=self.access, aws_secret_access_key=self.secret)
    retries = 5
    dns = None
    while retries > 0:
      try:
        i = ec2_conn.get_only_instances([new_instance])[0]
        dns = i.public_dns_name
        break
      except Exception as e:
        retries = retries - 1
        logging.warning("Exception happened while fetching instance dns name.")
        if retries > 0:
          logging.info("Retrying in 20 secs")
          time.sleep(20)
        else:
          logging.error("Failed to fetch dns of new instance.")
    return dns 
