import datetime

# make a PyDruid for dem bard
query = PyDruid(bard_url('demo'), 'druid/v2')

# data source details
dataSource = 'wikipedia_editstream'
dimension = "page"
intervals = "2013-06-14/pt1h"

#------------- test timeBoundary -----------------
bound = query.time_boundary(datasource=dataSource)
print 'timeboundary'
print bound

#------------- test timeseries -----------------
counts = query.timeseries(
    datasource=dataSource,
    granularity="hour",
    intervals=intervals,
    aggregations={"count": doublesum("count"), "rows": count("rows")},
    post_aggregations={'percent': (Field('count') / Field('rows')) * Const(100), 'diff': Field('count') - Field('rows')}
)

df = query.export_pandas()
print 'timeseries'
print df
print counts


#------------- test topN -----------------
top = query.topn(
    datasource=dataSource,
    granularity='all',
    intervals=intervals,
    aggregations={"count": doublesum("count")},
    dimension='page',
    metric='count',
    filter=Dimension('language') == 'en',
    threshold=2
)

dftop = query.export_pandas()
print 'topN'
print dftop
print top

#------------- test groupBy -----------------
query = PyDruid(bard_url('metrics'), 'druid/v2/?pretty')
dataSource = 'mmx_metrics'

filters = (Dimension("metric") == "query/time") & (Dimension("service") == "druid/prod/bard")
intervals = [(datetime.datetime.utcnow() - datetime.timedelta(minutes=5)).isoformat() + '/PT5M']

foo = query.groupby(datasource=dataSource,
                    intervals=intervals,
                    granularity="minute",
                    dimensions=['host', 'service'],
                    aggregations={"count": doublesum("count")},
                    filter=filters)

# -------- segment meta data ---------
query = PyDruid(bard_url(), 'druid/v2')
meta = query.segment_metadata(datasource='twitterstream', intervals='2013-10-04/pt1h')
print meta

# ------ examples -----
from pydruid.client import *
from pydruid.bard import *

# topn query
from pydruid.client import *

query = PyDruid(bard_url(), 'druid/v2')

top = query.topn(
    datasource='twitterstream',
    granularity='all',
    intervals='2014-03-03/p1d',  # utc time of 2014 oscars
    aggregations={'count': doublesum('count')},
    dimension='user_mention_name',
    filter=(Dimension('user_lang') == 'en') & (Dimension('first_hashtag') == 'oscars') &
           (Dimension('user_time_zone') == 'Pacific Time (US & Canada)') &
           ~(Dimension('user_mention_name') == 'No Mention'),
    metric='count',
    threshold=10
)

df = query.export_pandas()
print df

# timeseries query

from pydruid.client import *
from pylab import plt
from pydruid.bard import *


query = PyDruid(bard_url(), 'druid/v2')

ts = query.timeseries(
    datasource='twitterstream',
    granularity='day',
    intervals='2014-02-02/p4w',
    aggregations={'length': doublesum('tweet_length'), 'count': doublesum('count')},
    post_aggregations={'avg_tweet_length': (Field('length') / Field('count'))},
    filter=Dimension('first_hashtag') == 'sochi2014'
)
df = query.export_pandas()
df['timestamp'] = df['timestamp'].map(lambda x: x.split('T')[0])

plt.figure(figsize=(4, 3))
df.plot(x='timestamp', y='avg_tweet_length', ylim=(80, 140), rot=20,
        title='Sochi 2014')
plt.ylabel('avg tweet length (chars)')
plt.subplots_adjust(bottom=0.3, left = 0.2)
plt.show()


# groupby
from igraph import *
from cairo import *
from pandas import concat

group = query.groupby(
    datasource='twitterstream',
    granularity='hour',
    intervals='2013-10-04/pt12h',
    dimensions=["user_name", "reply_to_name"],
    filter=(~(Dimension("reply_to_name") == "Not A Reply")) &
           (Dimension("user_location") == "California"),
    aggregations={"count": doublesum("count")}
)

df = query.export_pandas()

# map names to categorical variables with a lookup table
names = concat([df['user_name'], df['reply_to_name']]).unique()
nameLookup = dict([pair[::-1] for pair in enumerate(names)])
df['user_name_lookup'] = df['user_name'].map(nameLookup.get)
df['reply_to_name_lookup'] = df['reply_to_name'].map(nameLookup.get)

# create the graph with igraph
g = Graph(len(names), directed=False)
vertices = zip(df['user_name_lookup'], df['reply_to_name_lookup'])
g.vs["name"] = names
g.add_edges(vertices)
layout = g.layout_fruchterman_reingold()
plot(g, "tweets.png", layout=layout, vertex_size=2, bbox=(400, 400), margin=25, edge_width=1, vertex_color="blue")

print bard_url()

