#!/usr/bin/env python
# coding: utf-8

#
# Initial release Nov. 5, 2009
# v6 release Jan. 20, 2009
# http://cal.freeshell.org
#
# Refactor, update and Python package
# by Socketubs (http://socketubs.net/)
# 09-08-12
#

import os
import re
import string
import sys
import time
from docopt import docopt
import requests

doc = """4chandownloader.py, download 4chan thread images.

Usage:
  4chandownloader.py <url> <path> [--delay=<int>] [--thumbs]
  4chandownloader.py -h | --help
  4chandownloader.py -v | --version

Options:
  --thumbs            Download thumbnails
  --delay=<int>       Delay between thread checks [default: 10]
  -h --help           Show help
  -v --version        Show version
"""

def main(args):
    #Regular Expressions
    imgurl = re.compile('\w+\.4chan\.org/\w+/src/\d+\.(?:jpg|gif|png|jpeg)')
    thumb = re.compile('thumbs\.4chan\.org/\w+/thumb/\d+s\.(?:jpg|gif|png|jpeg)')
    thumbname = re.compile("\d+s\.(?:jpg|gif|png|jpeg)")
    imgurl2 = re.compile('http://\w+\.4chan\.org/\w+/src/')
    picname = re.compile('\d+\.(?:jpg|gif|png|jpeg)')
    tname = re.compile('/\d+')
    rs = re.compile('http://rapidshare.com/files/\d+/.*\.(?:rar|zip|avi|wmv|part\d+\.rar|\d+)''|http://megaupload.com/?d=........''|http://megaporn.com/?d=........')

    #Initiate Variables
    thread = args.get('<url>')
    directory = args.get('<path>')
    delay = args.get('--delay')
    thumbs = args.get('--thumbs')

    errorcount = 0

    #Create directory name
    dirname = str(tname.findall(thread))

    #Clean directory name
    dirname = dirname.replace('[', '')
    dirname = dirname.replace(']', '')
    dirname = dirname.replace(chr(39), '')
    dirname = dirname.replace(chr(92), '')
    dirname = dirname.replace(chr(47), '')
    dname = dirname
    dirname = os.path.join(directory, dirname)

    print("Downloading to: " + dirname)
    #Create directorty if it doesn't exist
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    if thumbs:
        if not os.path.exists(os.path.join(dirname,"thumb")):
            os.makedirs(os.path.join(dirname, "thumb"))
        
    #Add \ to directory name for image saving        

    #Start
    while 1:
        print("Scraping: " + thread)

        #Get page
        try:
            response = requests.get(thread)
        except Exception as e:
            if errorcount < 1:
                errorcount = 1
                print("Request failed, retrying in %s seconds (%s)" % (int(delay)/4, e))
                time.sleep(int(delay))
                response = requests.get(thread)

        msg = response.text
        errorcount = 0

        #Find all pictures and rapidshare links
        kwl = imgurl.findall(msg)
        rsl = rs.findall(msg)
        tl = thumb.findall(msg)

        #Save pictures
        for item in list(set(kwl)): #list(set(kwl)) removes duplicates
        #Clean image URL and clean file name
            filename = picname.findall(str(item))
            fname = str(filename)
            fname = fname.replace('[', '')
            fname = fname.replace(']', '')
            fname = fname.replace(chr(39), '')
        #Download the image if it doesn't exists
            if not os.path.isfile(os.path.join(dirname, fname)):
                print("Downloading: %s" % item)
                try:
                    r = requests.get("https://%s" % item)
                    open(os.path.join(dirname, fname), 'w').write(r.content)
                    time.sleep(0.25)
                except Exception as e:
                    print("Image download failed, retrying in %s seconds (%s)" % (int(delay)/4, e))
                    time.sleep(int(delay)/4)
                    r = requests.get("https://%s" % item)
                    print(fname)
                    open(os.path.join(dirname, fname), 'w').write(r.content)
                    time.sleep(0.25)
            else:
                print(str(fname) + " Exists... Trying next file.")


        #Download thumbnails
        if thumbs:
            for item3 in list(set(tl)): #list(set(kwl)) removes duplicates
        #Clean image URL and clean file name
                filename = thumbname.findall(str(item3))
                fname = str(filename)
                fname = fname.replace('[', '')
                fname = fname.replace(']', '')
                fname = fname.replace(chr(39), '')
        #Download the image if it doesn't exists
                if not os.path.isfile(os.path.join(dirname, "thumb", fname)):
                    print("Downloading thumbnail: %s" % item3)
                    try:
                        r = requests.get("https://%s" % item3)
                        open(os.path.join(dirname, "thumb", fname), 'w').write(r.content)
                        time.sleep(0.25)
                    except Exception as e:
                        print("Thumbnail download failed, retrying in %s seconds (%s)" % (int(delay)/4, e))
                        time.sleep(int(delay)/4)
                        r = requests.get("https://%s" % item3)
                        open(os.path.join(dirname, "thumb", fname), 'w').write(r.content)                        
                        time.sleep(0.25)
                else:
                    print("%s (thumbnail) Exists... Trying next file." % fname)
            

        #Replace URLs with local images locations
        outp = open("%s.html" % (os.path.join(dirname, dname)), "w")

        for item3 in list(set(kwl)):
            filename = picname.findall(str(item3))
            fname = str(filename)
            fname = fname.replace('[', '')
            fname = fname.replace(']', '')
            fname = fname.replace(chr(39), '')
            fname = fname.replace("/", "")
            msg = msg.replace(str(item3), fname)
            strr = str(item3)
            strr = strr.replace("https:", "")
            msg = msg.replace(strr, fname)
            msg = msg.replace("//" + fname, os.path.join(dirname, fname))

        if thumbs:
            treg = re.compile("//\d\.thumbs.4chan.org/.?/")
            for item4 in list(set(tl)):
                filename = thumbname.findall(str(item4))
                fname = str(filename)
                fname = fname.replace('[', '')
                fname = fname.replace(']', '')
                fname = fname.replace(chr(39), '')
                msg = msg.replace("http:\\iqdb.org/?url=" + str(item4), chr(34) + "thumb" + chr(92) + fname + chr(34))
                msg = msg.replace("http://www.google.com/searchbyimage?image_url=" + str(item4), chr(34) + "thumb" + chr(92) + fname + chr(34))
                strr = str(item4)
                msg = re.sub(treg, "", msg)
        outp.write(msg.encode('ascii', 'ignore'))
        outp.close()


        #Save download links to a text file if they exist
        if not rs.search(msg):
            print("Nothing to download.")
        else:
            print("Downloads found!")
            foutrs = open(dirname + "dl.txt", "w")
            for item2 in list(set(rsl)):
                foutrs.write(str(item2) + "\n")
            foutrs.close()

        #Wait to execute code again
        print("Waiting %s seconds before retrying" % delay)
        time.sleep(int(delay))


if __name__ == '__main__':
    args = docopt(doc, version=0.1)
    main(args)
    