#!/usr/bin/env ruby
require 'rubygems'
require 'aws/auto_scaling'
require 'aws/ec2'
require 'aws/elb'
require 'socket'
require 'yaml'
require 'dbi'

class RollingDeploy
  def initialize(host, passwd, user, db, elb_name, env_str, pemfile)
    @env_str = env_str
    @host=host
    @passwd=passwd
    @user=user
    @db=db
    @elb_name = elb_name
    @pemfile=pemfile
    get_db_conn_handle
    sth = @db_conn_handle.prepare("SELECT var,value from settings where \
      var IN ('aws.root_access_key', 'aws.root_secret_key', 'aws.as_group_name')")
    sth.execute
    info = sth.to_a
    sth.finish
    @val = Hash.new
    info.each do |inf|
      @val[inf[0]]=inf[1]
    end    
  end
  
  def get_db_conn_handle
    return @db_conn_handle if !@db_conn_handle.nil?
    @db_conn_handle = nil
    # try to connect
    begin
      @db_conn_handle = DBI.connect("DBI:Mysql:#{@db}:#{@host}", @user, @passwd)
    rescue DBI::DatabaseError => e
      puts "#{Time.now.utc}: Failed to connect to DB"
      return 0
    end
  end
  
  def get_elb
    return @elb if !@elb.nil?
    access = @val['aws.root_access_key']
    secret = @val['aws.root_secret_key']
    @elb = AWS::ELB.new(:access_key_id => access, :secret_access_key => secret).load_balancers[@elb_name]
  end

  def get_as
    return @as if !@as.nil?
    access = @val['aws.root_access_key']
    secret = @val['aws.root_secret_key']
    as_group_name = @val['aws.as_group_name']
    @as = AWS::AutoScaling.new(:access_key_id => access, :secret_access_key => secret).groups[as_group_name]
  end

  def get_dj_wait_time
    sth = @db_conn_handle.prepare("SELECT MIN(run_at) AS min_run_at 
                                  FROM delayed_jobs JOIN (SELECT COUNT(*) AS num_jobs, 
                                    delayed_jobs.account_id, COALESCE(account_limits.limit, 20) AS account_limit 
                                    FROM delayed_jobs LEFT JOIN account_limits ON delayed_jobs.account_id = account_limits.account_id 
                                    AND account_limits.metric_name = 'Max concurrent commands' 
                                    WHERE queue='qpal_worker' AND failed_at IS NULL AND locked_by IS NOT NULL and locked_at > '#{DBI::Type::Timestamp.parse(Time.now.utc - 24 *60 *60)}'
                                    GROUP BY delayed_jobs.account_id 
                                    HAVING num_jobs < account_limit) valid_accounts 
                                  ON delayed_jobs.account_id = valid_accounts.account_id 
                                  WHERE queue = 'qpal_worker' AND locked_by IS NULL AND failed_at IS NULL 
                                  AND run_at <= '#{DBI::Type::Timestamp.parse(Time.now.utc)}' 
                                  ORDER BY min_run_at ASC LIMIT 1")
    sth.execute
    info = sth.to_a
    sth.finish
    wait_time = 0
    if info.size == 1
      wait_time = (info[0].nil? or info[0][0].nil?) ? 0 : (Time.now.utc - info[0][0]).to_i
    end
    return wait_time
  end

  def get_max_as_nodes
    return get_as.max_size
  end
 
  def get_total_as_nodes
    return get_as.desired_capacity
  end
 
  def get_min_as_nodes
    return get_as.min_size
  end
  
  def get_available_slots_per_node
    puts("Available Slots are: #{@available_slots}")
    return @available_slots if !@available_slots.nil?
    instance_type = get_as.launch_configuration.instance_type
    if instance_type == "m1.xlarge"
      @available_slots = 8
    else
      @available_slots = 6
    end
    return @available_slots
  end
 
  def get_busy_slots
    sth = @db_conn_handle.prepare("select count(*) from `delayed_jobs` WHERE (locked_by is NOT NULL AND locked_at > '#{DBI::Type::Timestamp.parse(Time.now.utc - 24 * 60 *60)}' AND failed_at IS NULL AND queue IN ('qpal_worker'))")
    sth.execute
    info = sth.to_a
    sth.finish
    busy_slots = 0
    if info.size == 1
      busy_slots = (info[0][0]).to_i
    end
    return busy_slots    
  end

  def can_do_rolling_upgrade?
    wait_time = get_dj_wait_time
    puts("Wait time is: #{wait_time}")
    return false if wait_time > 0 
    
    busy_slots = get_busy_slots
    available_slots_per_node = get_available_slots_per_node
    new_nodes_reqd =  ((busy_slots + available_slots_per_node )/available_slots_per_node)
    
    max_nodes = get_max_as_nodes
    total_nodes = get_total_as_nodes
    puts("Total nodes: #{total_nodes}, max_nodes: #{max_nodes}")

    buffer_nodes = max_nodes - total_nodes
    puts("New nodes reqd are: #{new_nodes_reqd} and buffer nodes are: #{buffer_nodes}. To do rolling deploy please increase the max size of autoscaling group.")
    return false if buffer_nodes < new_nodes_reqd || buffer_nodes < 1 #buffer nodes < 1 mean we cant even start a master

    return true
  end
  
  def get_as_instances
    is = get_as.auto_scaling_instances
    instances = []
    is.each do |i|
      instances << i.id
    end
    return instances
  end

  def rolling_upgrade
    if can_do_rolling_upgrade? == false
      return false
    end
    
    #mark current nodes
    `knife exec ./set_node_attr.rb #{@env_str} shutdown_gracefully yes`
    
    #figure out current instances
    old_instances = get_as_instances
    puts("Old instances are: #{old_instances.inspect}")
    
    #start master
    get_as.set_desired_capacity(get_total_as_nodes + 1)
    #wait for new node to show up in autoscaling group and figure out its id
    @new_instance = nil
    while @new_instance.nil?
      sleep 5
      new_instances = get_as_instances
      puts("New instances are: #{new_instances.inspect}")
      @new_instance = (new_instances - old_instances)[0]
    end
    puts("New instance is: #{@new_instance}. Waiting for 3 mins before checking if this instance has attached to elb or not")
    #wait for master node to get in service under elb.
    sleep 180
    health = ""
    while health != "InService"
      sleep 10
      health = get_elb.instances[@new_instance].elb_health.state
      puts "Current elb health of new instance is: #{health}" 
    end
    #now shut down everything on old nodes
    puts "Killing Djs on older nodes" 
    old_nodes_selector = "chef_environment:#{@env_str} AND qubole_tier:web AND shutdown_gracefully:yes"
    `knife ssh '#{old_nodes_selector}' 'sudo chef-client -j /etc/graceful_shutdown' -x ec2-user -i #{@pemfile} -a ec2.public_hostname --no-host-key-verify`
    
    puts "Killed DJs on older nodes" 
    #figure out how many more nodes are required
    busy_slots = get_busy_slots
    available_slots_per_node = get_available_slots_per_node
    new_nodes_reqd =  (busy_slots/available_slots_per_node)
    min_nodes = get_min_as_nodes 
    new_nodes_reqd = [min_nodes - 1, new_nodes_reqd].max #master is already started, so subtract 1 from min nodes
    
    max_nodes = get_max_as_nodes
    total_nodes = get_total_as_nodes
    buffer_nodes = max_nodes - total_nodes  
    
    new_nodes_to_start = [buffer_nodes,  new_nodes_reqd].min
    puts "Staring new nodes: #{new_nodes_to_start}"
    if new_nodes_to_start > 0 
      get_as.set_desired_capacity(get_total_as_nodes + new_nodes_to_start)
    end
  end
end

if __FILE__ == $0
  env_str= ARGV[0]
  host= ARGV[1]
  passwd= ARGV[2]
  user= ARGV[3]
  db= ARGV[4]
  elb= ARGV[5]
  pemfile= ARGV[6]
  roll_deploy=RollingDeploy.new(host, passwd, user, db, elb, env_str, pemfile).rolling_upgrade
end

