# #START_LICENSE###########################################################
#
# Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.
# email: jhcepas@gmail.com
#
# This file is part of the Environment for Tree Exploration program (ETE).
# http://ete.cgenomics.org
#
# ETE is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ETE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
#
# #END_LICENSE#############################################################


import unittest
import random
import sys
import numpy

from ete2 import *
from ete2.coretype.tree import asRphylo, asETE

# test datasets
nw_simple1 = '((A, (B,C)),D);'
nw_simple2 = '((D, (B,C)),A);'
nw_simple3 = '((B, (A,C)),D);'
nw_simple4 = '((A, (B,C)),D);'

nw_simple5 = '(H,(A,(B,C,D)),D,T,S,(U,Y));'
nw_simple6 = '(H,(A,(B,(C),(T))),D);'

nw_full = '(Ddi0002240:1.457468,Dme0014628:1.235133,(Aga0007658:1.752559,(Cin0011239:0.728210,((Fru0004507:0.184484,((Dre0008391:0.000000,Dre0008390:0.000000)1.000000:0.002729,Dre0008392:0.010931)1.000000:0.122420)1.000000:0.142530,((Xtr0044988:0.422481,(Gga0000982:0.000000,Gga0000981:0.000000)1.000000:0.109228)1.000000:0.035488,(Mdo0014718:0.129337,((Mms0024821:0.027982,Rno0030248:0.029287)1.000000:0.074667,((Cfa0016700:0.031643,Bta0018700:0.047366)1.000000:0.007962,(Ptr0000001:0.005433,((Hsa0010730:0.018311[&&NHX:flag=Red:mood=bad],Hsa0000001:0.003656)1.000000:0.007173,Hsa0010711:0.002730)1.000000:0.014995)1.000000:0.052577)1.000000:0.010070)1.000000:0.035671)1.000000:0.074417)1.000000:0.034385)1.000000:0.184380[&&NHX:flag=Black])1.000000:0.219467)1.000000:0.317782[&&NHX:flag=White]);'
nw_dflt = '(Ddi0002240:1.457468,Dme0014628:1.235133,(Aga0007658:1.752559,(Cin0011239:0.728210,((Fru0004507:0.184484,((Dre0008391:0.000000,Dre0008390:0.000000)1.000000:0.002729,Dre0008392:0.010931)1.000000:0.122420)1.000000:0.142530,((Xtr0044988:0.422481,(Gga0000982:0.000000,Gga0000981:0.000000)1.000000:0.109228)1.000000:0.035488,(Mdo0014718:0.129337,((Mms0024821:0.027982,Rno0030248:0.029287)1.000000:0.074667,((Cfa0016700:0.031643,Bta0018700:0.047366)1.000000:0.007962,(Ptr0000001:0.005433,((Hsa0010730:0.018311,Hsa0000001:0.003656)1.000000:0.007173,Hsa0010711:0.002730)1.000000:0.014995)1.000000:0.052577)1.000000:0.010070)1.000000:0.035671)1.000000:0.074417)1.000000:0.034385)1.000000:0.184380)1.000000:0.219467)1.000000:0.317782);'
nw_topo = '(Ddi0002240,Dme0014628,(Aga0007658,(Cin0011239,((Fru0004507,((Dre0008391,Dre0008390),Dre0008392)),((Xtr0044988,(Gga0000982,Gga0000981)),(Mdo0014718,((Mms0024821,Rno0030248),((Cfa0016700,Bta0018700),(Ptr0000001,((Hsa0010730,Hsa0000001),Hsa0010711))))))))));'
nw_dist = '(Ddi0002240:1.457468,Dme0014628:1.235133,(Aga0007658:1.752559,(Cin0011239:0.728210,((Fru0004507:0.184484,((Dre0008391:0.000000,Dre0008390:0.000000):0.002729,Dre0008392:0.010931):0.122420):0.142530,((Xtr0044988:0.422481,(Gga0000982:0.000000,Gga0000981:0.000000):0.109228):0.035488,(Mdo0014718:0.129337,((Mms0024821:0.027982,Rno0030248:0.029287):0.074667,((Cfa0016700:0.031643,Bta0018700:0.047366):0.007962,(Ptr0000001:0.005433,((Hsa0010730:0.018311,Hsa0000001:0.003656):0.007173,Hsa0010711:0.002730):0.014995):0.052577):0.010070):0.035671):0.074417):0.034385):0.184380):0.219467):0.317782);'
nw2_full = '((((((((YGR138C:0.038472,YPR156C:0.033397)1.000000:0.050097,YOR230W:0.220261)1.000000:0.114267,(YAL018C:0.035830,YBR287W:0.024912,YCL075W:0.030383,YDR055w:0.044474,YOR358W:0.045875)1.000000:0.030762)1.000000:0.061899,(YBR006W:0.012167,YBR241C:0.045848,YCR021c:0.011703,YCR061W:0.034785,YDL024c:0.030633,YDR298C:0.011081,YER141w:0.052598,YER158c:0.088493,YGR028W:0.015282,YGR149W:0.025905,YIR038C:0.021086,YJL155C:0.011531,YLR297W:0.052342,YLR423C:0.023426,YOL083W:0.046727,YOR049C:0.029090,YPL087W:0.015005)1.000000:0.010338)1.000000:0.022530,(YBR052C:0.044257,YBR054W:0.022772,YBR056W:0.033843,YBR183W:0.027453,YCL040w:0.076683,YCL042W:0.186523,YDL021W:0.007351,YDR032c:0.034443,YDR342C:0.024799,YDR343C:0.077931,YER053c:0.042805,YER150w:0.028763,YGR194C:0.019262,YGR244C:0.019371,YHR092C:0.064983,YIL111W:0.085602,YIR039C:0.061777,YJL079C:0.068661,YJL164C:0.066530,YJR073C:0.004770,YKL035W:0.114557,YMR250W:0.026205,YMR297W:0.057263,YNL160W:0.020050,YOR136W:0.031351,YOR273C:0.045478,YOR347C:0.017452,Ydr021w:0.029241,YPL154C:0.026716)1.000000:0.014156)1.000000:0.036360,((YBL078C:0.018348,YBR072W:0.008034,YBR139W:0.030263,YBR149W:0.017972,YBR169C:0.001920,YBR204C:0.017318,YDL022w:0.025509,YDL023c:0.042978,YDL091c:0.013934,YDR001C:0.014137,YDR077W:0.044424,YDR171W:0.011951,YDR178W:0.013526,YDR231C:0.005223,YDR258C:0.019661,YDR513W:0.016566,YDR529C:0.019541,YEL024w:0.020349,YFL014W:0.019925,YFR015C:0.027107,YFR033C:0.019186,YGL006W:0.019943,YGL187C:0.011290,YGL191W:0.030190,YGR008C:0.017479,YGR019W:0.017015,YGR088W:0.015580,YGR174C:0.011180,YHL021C:0.023666,YIL087C:0.011636,YIL107C:0.005697,YKL085W:0.003164,YKL103C:0.039112,YKL148C:0.022439,YKL150W?1:0.015616,YKR016W:0.017934,YKR067W:0.024008,YLL026w:0.019043,YLL041c:0.014446,YLR178C:0.029435,YLR258W:0.052186,YLR294C:0.024676,YLR299W:0.032933,YLR304C:0.045758,YLR327C:0.007435,YLR345W:0.005768,YLR395C:0.045549,YML100W:0.043110,YML120C:0.018385,YML128C:0.016054,YMR105C:0.024179,YMR110C:0.017258,YMR133W:0.023707,YMR196W:0.036526,YNL015W:0.019308,YNL055C:0.025667,YNL100W:0.004518,YNL144C:0.004901,YOL048C:0.026640,YOL053C:0.019814,YOR215C:0.010501,YOR317W:0.018442,YOR374W:0.011387,YPL078C:0.070312,YPL230W:0.020924,YPR149W:0.010801,YDR258C:0.014695)1.000000:0.008367,(YAL060W:0.015064,YBL038W:0.014364,YBL064C:0.028571,YBR269C:0.016747,YCL035C:0.025807,YDL004W:0.007085,YDR031w:0.003761,YDR074W:0.069217,YDR272W:0.011113,YDR272W:0.013582,YDR277C:0.032421,YDR516C:0.040793,YEL039c:0.016632,YER035w:0.024800,YER067w:0.038474,YER182w:0.011733,YFR053C:0.252371,YGL037C:0.047136,YGL121C:0.022573,YGL199C:0.011657,YGR248W:0.012191,YHR104W:0.015812,YIL113W:0.013517,YIL136W:0.039322,YIL162W:0.067884,YJL144W:0.022571,YJL151C:0.017277,YJR096W:0.019693,YKL026C:0.016814,YKL142W:0.033154,YKL151C:0.027220,YKL193C:0.017688,YLL023C:0.013887,YLR080W?1:0.010144,YLR270W:0.017368,YLR356W?1:0.007843,YML004C:0.047041,YMR030W:0.012315,YMR090W:0.048551,YMR170C:0.010852,YMR195W:0.035760,YNL045W:0.020669,YNL115C:0.048069,YNL173C:0.025198,YNL200C:0.026012,YNL274C:0.005603,YNL305C:0.034282,YNR073C:0.022308,YOL071W:0.026411,YOR052C:0.024637,YOR161C:0.021991,YOR220W:0.040045,YPL004C:0.024521)1.000000:0.013616)1.000000:0.006005)1.000000:0.021741,((((YAL034C:0.018415,YBL043W:0.024425,YBL049W:0.022513,YBR046C:0.031770,YCR039c:0.006232,YCR091w:0.026004,YDL079C:0.094125,YDL085w:0.032467,YDL204w:0.022855,YDL218w:0.016136,YDL223c:0.074370,YDR003W:0.073250,YDR018c:0.053591,YDR313C:0.025321,YIL097W:0.012089,YJL141C:0.035049,YKL093W:0.014752,YLR164W?1:0.011162,YLR168C:0.017070,YMR139W:0.035376,YNL093W:0.025831)1.000000:0.015975,((YBL048W:0.007604,YBL075C:0.014956,YBR067C:0.016580,YBR101C:0.007598,YBR147W:0.006150,YDR070c:0.010301,YGR236C:0.008648,YGR238C:0.010611,YGR243W:0.004473,YJL067W:0.007740,YKL016C:0.009956,YKL217W:0.009919,YLR216C:0.041156,YLR217W:0.018315,YLR219W:0.006671,YLR295C:0.017458,YMR107W:0.003236,YNL037C:0.011856,YNL134C:0.001122,YNL194C:0.003001,YNL202W:0.013719,YOR031W:0.014812)1.000000:0.006651,(YBR116C:0.033659,YDL020C:0.022419,YDR043C:0.056132,YDR262w:0.008193,YEL065w:0.266647,YGR146C:0.007376,YGR256W:0.035848,YJR155W:0.008535,YKL163W:0.036797,YNL036W:0.057108,YOR027W:0.016952,YPL135W:0.014251)1.000000:0.024637)1.000000:0.005255)1.000000:0.007999,(YOL109W:0.104113,YAL067C:0.023929,YBR053C:0.029405,YBR203W:0.013784,YBR214W:0.010883,YBR280C:0.017570,YDL169C:0.023945,YDL245C:0.040120,YDR148C:0.019495,YDR255C:0.031336,YDR329C:0.018032,YDR533C:0.001250,YER098w:0.018484,YER142c:0.030633,YER175c:0.004409,YGL045W:0.032391,YGL059W:0.050804,YGR043C:0.016315,YGR052W:0.044183,YGR066C:0.008994,YGR130C:0.022106,YGR201C:0.026083,YGR289C:0.013920,YHR195W:0.038417,YJL153C:0.033018,YJL161W:0.025820,YJR008W:0.027243,YJR080C:0.030356,YKR058W:0.010740,YLL001w:0.008319,YLR271W:0.028940,YMR271C:0.020724,YMR311C:0.031845,YMR322C:0.021606,YNR007C:0.020500,YOL032W:0.017985,YOL053W:0.040060,YOR178C:0.004028,YOR289W:0.017451,YOR386W:0.025749,YPL185W:0.017283,YPL186C:0.009140,YPR150W:0.025491)1.000000:0.011110)1.000000:0.012866,((YBL015W:0.018473,YBL099W:0.046953,YBL100C:0.046467,YBR039W:0.027280,YBR132C:0.057252,YDL181W:0.052427,YDR059C:0.028415,YDR085C:0.055567,YEL011w:0.020230,YER079w:0.013469,YFL030W:0.019404,YGL259W:0.019404,YGR070W:0.028021,YGR142W:0.002517,YGR250C:0.017097,YHL024W:0.016718,YHR051W:0.020669,YIL101C:0.040969,YJL103C:0.015318,YJR048W:0.013049,YJR121W:0.018256,YKL109W:0.013086,YKL141W:0.018280,YLR038c:0.046595,YLR149C:0.034217,YLR312C:0.025271,YML054C:0.014777,YMR031C:0.041817,YMR056C:0.024105,YMR081C:0.019086,YMR136W:0.078413,YMR181C:0.059683,YMR191W:0.007052,YNL052W:0.018461,YNR001C:0.008023,YOL117W:0.037811,YOL126C:0.058282,YOL153C:0.019880,YOR035C:0.045863,YOR065W:0.023359,YOR100C:0.021039,YPL165C:0.012048,YPL201C?1:0.009305,YPL223C:0.013347,YPR020W:0.014464,YPR098C:0.027269,YPR184W:0.035196)1.000000:0.007269,((YAL054C:0.010770,YDR125C:0.021905,YDR216W:0.027122,YEL012w:0.014583,YER024w:0.016562,YGL062W:0.266984,YIL125W:0.016436,YJL089W:0.010550,YJL163C:0.007033,YKL187C:0.012379,YLR259C:0.059779,YLR311C:0.011440,YNL009W:0.025277,YOL084W?1:0.005872,YPL262W:0.002576,YPR030W:0.011058)1.000000:0.010088,(YBL045C:0.015491,YBR001C:0.059987,YBR117C:0.033327,YBR298C:0.021627,YCR005c:0.008913,YDL199c:0.022880,YGL153W:0.006091,YGR110W:0.019326,YHL032C:0.008845,YHR096C:0.012855,YJL045W:0.007726,YJL137C:0.016000,YJR095W:0.035130,YKR076W:0.013664,YLR174W:0.025886,YLR267W:0.009587,YML042W:0.009641,YMR114C:0.242923,YMR206W:0.019201,YOR019W:0.035474,YOR391C:0.009738)1.000000:0.005262)1.000000:0.013775)1.000000:0.014916)1.000000:0.021186)1.000000:1.753931,((((((YCR054C:0.153934,YKL061W:0.133755,YLR109W:0.142228)1.000000:0.198112,(YBL054W:0.050037,YBR034C:0.038518,YGR177C:0.059524,YKL078W:0.026939,YKL106W:0.046488,YKR024C:0.035324,YLR380W:0.028761,YMR310C:0.034731,YOR271C:0.006213,YPL256C:0.059639)1.000000:0.012497)1.000000:0.065443,(YBR247C:0.050191,YBR267W:0.034203,YCLX02C:0.011558,YCR058C:0.051217,YDL151c:0.037685,YDR384C:0.011912,YDR465C:0.012999,YFL022C:0.009532,YGR159C:0.012299,YGR280C:0.013983,YHR206W:0.043975,YIL019W:0.086266,YIL065C:0.027422,YJL194W:0.096096,YKR056W:0.007362,YLL012W:0.031657,YLR073C:0.016216,YLR214W:0.039115,YLR293C:0.015673,YMR093W:0.009965,YMR259C:0.016481,YNR038W:0.015690,YOL019W:0.028424,YOL101C:0.023597,YOR272W:0.033415,YOR294W:0.033731,YPL043W:0.024688,YPL126W:0.011288,YPR144C:0.013358)1.000000:0.010101)1.000000:0.019870,((YBL068W:0.038979,YBR158W:0.008382,YBR186W:0.116086,YCR055C:0.061205,YCR065w:0.003695,YDL037c:0.048015,YDL042C:0.022897,YDL182w:0.039913,YDR508C:0.030289,YER145c:0.024536,YER165w:0.005328,YGR245C:0.009444,YJL069C:0.017205,YLR397C:0.033973,YMR108W:0.024272,YMR309C:0.017863,YNL124W:0.016342,YNL189W:0.033389,YOL007C:0.062976,YOL130W?1:0.068678)1.000000:0.017449,((YAL043C:0.085408,YDR006C:0.131879,YHR205W:0.038850,YJL195C:0.040006,YPL061W:0.070549)1.000000:0.031474,(YDR101C:0.068051,YKL185W:0.029377,YNL061W:0.047111,YOR091W:0.057478)1.000000:0.047015)1.000000:0.102596)1.000000:0.046055)1.000000:0.039969,(YAL003W:0.027682,YAL025C:0.008438,YAL046C:0.016620,YAL059W:0.051270,YAR071W:0.027037,YAR074C:0.005698,YBL024W:0.008170,YBL039C:0.039903,YBR069C:0.243928,YBR079C:0.025916,YBR089W:0.031468,YBR093C:0.043355,YBR142W:0.051410,YBR238C:0.051070,YBR266C:0.043018,YCL046W:0.019256,YCL054W:0.041245,YCR053w:0.025386,YDL050c:0.008063,YDL051W:0.022365,YDL052C:0.053016,YDL063c:0.018176,YDL084w:0.016863,YDL111c:0.008781,YDL122W:0.043325,YDL131w:0.108001,YDL140C:0.024567,YDL148c:0.046571,YDL152w:0.055074,YDL153c:0.017418,YDL167C:0.009333,YDL213c:0.017884,YDR024w:0.148026,YDR144C:0.010083,YDR165W:0.015923,YDR206W:0.005912,YDR324C:0.042943,YDR341C:0.015004,YDR361C:0.015177,YDR398W:0.018739,YDR492W:0.019353,YDR502C:0.023435,YDR527W:0.025343,YEL033w:0.020575,YEL040w:0.048325,YEL046c:0.015069,YER002w:0.014535,YER006w:0.017578,YER025w:0.014587,YER036c:0.016587,YER043c:0.030110,YER049w:0.045735,YER052c:0.022026,YER070w:0.021812,YER110c:0.022076,YGR103W:0.034426,YGR123C:0.049640,YGR124W:0.006737,YGR145W:0.025280,YGR155W:0.054731,YGR160W:0.013018,YGR162W:0.020365,YGR264C:0.018057,YHR046C:0.180512,YHR052W:0.010195,YHR064C:0.016477,YHR070W:0.036840,YHR215W:0.030425,YIL053W:0.012927,YIL066C:0.008671,YIL091C:0.003393,YJL050W:0.016488,YJL109C:0.060257,YJL122W:0.017642,YJR003C:0.022002,YJR016C:0.014266,YJR041C:0.020306,YJR054W:0.037849,YKL076C:0.038156,YKL082C:0.012586,YKL181W:0.005029,YKL191W:0.099536,YLL008w:0.016875,YLL021w:0.024558,YLR068W:0.079458,YLR084C:0.035147,YLR180W:0.009062,YLR196W:0.046001,YLR221C:0.028516,YLR223C:0.057444,YLR243W:0.010906,YLR244C:0.037764,YLR280C:0.020274,YLR355C:0.025286,YLR409C:0.009258,YLR435W:0.092123,YLR449W:0.018710,YML123C:0.018546,YMR011W:0.032472,YMR014W:0.007539,YMR037C:0.063027,YMR049C:0.011294,YMR058W:0.010664,YMR080C:0.040422,YMR128W:0.033444,YMR129W:0.046421,YMR146C:0.021802,YMR217W:0.041486,YMR229C:0.004043,YMR239C:0.006788,YMR241W:0.012776,YMR246W:0.023201,YMR290C:0.003882,YNL002C:0.029441,YNL013C:0.037125,YNL060C:0.001669,YNL062C:0.014338,YNL075W:0.012303,YNL110C:0.007903,YNL112W:0.034459,YNL120C:0.018624,YNL123W:0.022098,YNL132W:0.003370,YNL141W:0.009783,YNL174W:0.101658,YNL182C:0.007662,YNL207W:0.014884,YNL216W:0.170324,YNL221C:0.015522,YNL256W:0.011304,YNL308C:0.045905,YNL313C:0.026778,YNL327W:0.008828,YNR025C:0.048977,YNR043W:0.020504,YNR046W:0.055271,YNR054C:0.005370,YNR075W:0.013830,YOL021C:0.010752,YOL080C:0.031182,YOR116C:0.029862,YOR146W:0.053670,YOR206W:0.051387,YOR233W:0.036870,YOR335C:0.009945,YOR341W:0.023363,YOR355W:0.015312,YOR361C:0.022609,YPL012W:0.003133,YPL019C:0.014463,YPL030W:0.011898,YPL032C:0.047324,YPL093W:0.009715,YPL183C:0.087818,YPL211W:0.006930,YPL226W:0.013388,YPR009W:0.025512,YPR034W:0.010374,YPR110C:0.009930,YPR136C:0.133899,YPR143W:0.021514,YPR145W:0.025839)1.000000:0.005840)1.000000:0.024107,((YHR007C:0.024321,YAL023C:0.037896,YAL036C:0.027457,YAR073W:0.007629,YBL076C:0.007966,YBL087C:0.016367,YBR048W:0.037560,YBR092C:0.027230,YBR121C:0.027699,YBR143C:0.022332,YBR156C:0.004017,YBR249C:0.000577,YCL053C:0.012758,YDL014W:0.011777,YDL060w:0.008162,YDL082w:0.006478,YDL145C:0.025230,YDL208W:0.003093,YDR023W:0.039428,YDR037W:0.023439,YDR060w:0.005655,YDR062W:0.013960,YDR064W:0.002168,YDR321W:0.006361,YDR365C:0.006326,YDR399W:0.003641,YDR447C:0.015148,YDR449C:0.001937,YDR450W:0.016031,YDR471W:0.015488,YEL026w:0.019446,YEL054c:0.008555,YER056c:0.022337,YER060w:0.036023,YER129w:0.011346,YFL045C:0.053502,YGL008C:0.012515,YGL029W:0.067475,YGL078C:0.045179,YGL092W:0.045425,YGL103W:0.009550,YGL225W:0.027201,YGR034W:0.012058,YGR060W:0.013105,YGR061C:0.009325,YGR214W:0.007962,YHL033C:0.014192,YHR019C:0.019476,YHR049W:0.036925,YHR089C:0.010167,YHR128W:0.008165,YHR203C:0.007600,YHR216W:0.002384,YIL052C:0.013691,YIL069C:0.012335,YIL133C:0.004129,YJL111W:0.017878,YJL138C:0.009234,YJL177W:0.002417,YJL183W:0.005059,YJR071W:0.008702,YJR105W:0.058597,YJR123W:0.003913,YKL009W:0.009543,YKL057C:0.034921,YKL081W:0.007191,YKL156W:0.011042,YKR013W:0.029496,YKR025W:0.021172,YKR094C:0.008062,YLL004w:0.026716,YLL044W:0.008058,YLR009W:0.012315,YLR048w:0.004366,YLR056w:0.028151,YLR061W:0.003944,YLR083c:0.016167,YLR129w:0.052379,YLR134w:0.018072,YLR175W:0.025815,YLR179C:0.014679,YLR185W:0.007868,YLR186W:0.055986,YLR212C:0.006866,YLR339C:0.013427,YLR384C:0.043396,YLR413W:0.007107,YLR432W:0.000390,YLR448W:0.013913,YML059C:0.068679,YMR199W:0.180049,YMR202W:0.013830,YMR205C:0.011290,YMR242C:0.002943,YMR308C:0.012121,YMR318C:0.009922,YMR321C:0.007602,YNL065W:0.040820,YNL087W:0.011444,YNL096C:0.016268,YNL111C:0.009092,YNL175C:0.021305,YNL235C:0.017678,YNL247W:0.010779,YNL301C:0.006244,YNL303W:0.026882,YNR009W:0.069305,YNR051C:0.005803,YNR053C:0.011495,YNR067C:0.038533,YOL010W:0.013445,YOL014W:0.050775,YOL120C:0.011740,YOL061W:0.014505,YOL077C:0.027831,YOR063W:0.004912,YOR096W:0.009238,YOR153W:0.010446,YOR234C:0.008998,YOR277C:0.023939,YOR293W:0.013614,YOR309C:0.007759,YPL034W:0.035088,YPL143W:0.030537,YPL160W:0.018401,YPL198W:0.002841,YPL243W:0.014395,YPL245W:0.004341,YPL266W:0.015510,YPR033C:0.028234,YPR044C:0.016689,YPR074C:0.006226,YPR112C:0.012512,YPR125W:0.030210,YHR098C:0.005812)1.000000:0.005387,((YAL012W:0.037345,YAL038W:0.007617,YBL027W:0.013554,YBR106W:0.006914,YBR189W:0.010072,YBR191W:0.005150,YDL075W:0.007897,YDL083C:0.031836,YDL136w:0.006651,YDL191W:0.011001,YDL210W:0.015110,YDR366C:0.015185,YDR417C:0.003784,YDR418W:0.001455,YDR500C:0.007442,YER074w:0.008117,YER117w:0.008649,YER131w:0.002672,YFR031BC:0.012496,YGL030W:0.003864,YGL076C:0.013045,YGL123W:0.012111,YGL135W:0.018213,YGL147C:0.004678,YGR118W:0.013782,YGR148C:0.005154,YGR285C:0.024308,YHL001W:0.021713,YHL015W:0.009606,YHR010W:0.002032,YHR141C:0.007074,YHR208W:0.038459,YIL018W:0.000289,YIL039W:0.015756,YIL148W:0.016059,YJL080C:0.017726,YJL136C:0.009858,YJL148W:0.025592,YJL188C?1:0.009377,YJL189W:0.030581,YJR063W:0.036817,YJR145C:0.011576,YKL006W:0.005959,YKR043C:0.016532,YKR057W:0.009924,YKR059W:0.011284,YLL045c:0.014089,YLL047W:0.023354,YLR029c:0.010768,YLR044c:0.011219,YLR060w:0.020979,YLR062C:0.006945,YLR076C:0.010549,YLR198C:0.003174,YLR249W:0.022251,YLR325C:0.009715,YLR340W:0.008365,YLR344W:0.010317,YLR367W:0.003094,YLR388W:0.021736,YML063W:0.006051,YMR121C:0.017411,YMR131C:0.026306,YNL119W:0.014916,YNL162W:0.016264,YNL302C:0.004123,YNR050C:0.058206,YOL121C:0.009887,YOL127W:0.013974,YOL040C:0.007057,YOR224C:0.013626,YOR254C:0.017923,YOR310C:0.006634,YOR312C:0.003510,YPL079W:0.030247,YPL131W:0.022146,YPL142C:0.015716,YPL220W:0.009552,YPL244C:0.022966,YPR102C:0.008204,YPR137W:0.017679)1.000000:0.002907,((YBR032W:0.016097,YBR181C:0.009522,YBR187W:0.018681,YDR012W:0.011279,YGL102C:0.009205,YGR027C:0.003690,YJL157C:0.024018,YJL190C:0.007303,YLR058c:0.073551,YLR264W:0.018569,YLR300W:0.020115,YLR342W:0.034792,YLR359W:0.022991,YLR372W:0.011058,YMR300C:0.032100,YMR305C:0.014402,YNL069C:0.028621,YOR133W:0.004537,YOR167C:0.014198,YOR182C:0.023853,YOR326W:0.024632,YPL080C:0.064427,YPL081W:0.012279,YPL090C:0.012152)1.000000:0.006473,(YOR298W:0.233587,((YDR019C:0.040149,YKL096W:0.037806)1.000000:0.064948,(YKL108W:0.033111,YPR113W:0.038607)1.000000:0.063282)1.000000:0.016535)1.000000:0.229462)1.000000:0.024472)1.000000:0.008239)1.000000:0.017711)1.000000:0.082401);'

Ago0000003="MATDNFYRMMILLEEPHKQHSTDGRKIENASHDFVDELLLPIQVDDLEVLNTWFDKFDEEICIPNEGHIKYEISSDGLIVLILDKEISEVIGLVRDFVAANQLDEDSSSDAK"
fasta_example = """
>Ago0000001
MFPSSATLLSFYAATSYVAALYVFPGRRIYGHASRNDPKAIWYGMKAVGCGVLANLLIIPWLQSRLAS
DGFSFVDCFFRLGLVPGAYAHFRGLHWDTLAYATDILRALSILGSLYAADLLDSAAYYLLVPDTSPVVDLVDRLSCTTGLRNYVFGPITEELVYTSMVLQNYRLLQPTISRAMLLLATPMFFGVAHVHHARQLLATGHRPAQVALTSSFQILYTTLFGTFT
NYIYYHTAGNLWACILLHAVCNYLSFPSLSSDVFADYCAKVPPALRALWKMRLLHAWGYTYRLCLLCGLLAFLDGIRTFSSSAGDLFLDA
>Ago0000002
MKADYTARASKTRGGQKVGGVRRESAGQGMSMSEDEFQAAQVRQRRSKSCAGPLEGVQERVLRAHTRQAKEKRKSEVLIAAQSLDYELQNVKNLKRISIGSMDLLMDPEMEFRMNGAAVSPSASPRNSLTEESLEWFQLSAEQEQGGELPLHAESIDAHSSAGGSSDTARDELLTSASSVVPSEELSCSHPLSPPAAAKKSTLMSSRSKLGRQHTDTTHTKQATGEPFS
SNLLWVRADQHPNVKPENYLELVHDTLNNLRIGARHGRGDSPVQQSNTPVANGESSTASARSLVRKQSRLRKSFTEVEAIEESQFTDSIGSEIPVGKRMRVSSLKEITEELTRISNNAGLTDSDAITLARTLGIGSQSADEHATFSECVPSAEAEENEYASSILAKNGLAIPARSSLRRSKFTTYRIRSAGSDSSPPENKSTSSSSLAAAYERNSRSPKMQKAAFKGSH
PRLSGENTSLAPQSPNSINDIYDHYNTSDTDDGSPELQGSPVTHTSDASFTSSPNIASGEHCARNAHKISPISVSQLSATSSSSTSPGSSKSAESTKSWTFDQKSPLRSITLNSSTKGHKISAEKKKGWSWFGNSRRPSSESVLLPGTDSEGCPMDESQPIQSEEYPLKHPISPVSDETPRRGNHSRNRHQTSSPEELEIGWTETSLTTLDGDTEQDGKPQHPKTKEKL
EKKFMKIFKKRSGTGLSQDTPNQGEESSLKATSRIRRSRESKRQTDEPSASLTTSVSLNNHNNGRKSVSISRPTWQRTESSYNGDKGKGSITSLQPAVSVTSSRDSHEQTRQDPQPQSLVTKMDGSSSNGVTVSSQRAGFSKKGRSKTSHSRSKLSTSHDIAPVDNRPSDVDQKSTEANNTLSSRAEDSHPSLDPGKSALGPGEIAHSLPPRKLRFDDVLRPEKPNSPM
KFTPSAFGFPLPPLTVSTVIMFDHRLPIYVERAIYRLSHLKLSDPKRELRQQVLLSNFMYSYLNLVNHSLYLQQIEEDKNQGIQFESTDNMASSSGVVPEKW


 > Ago0000003"""+"\t"+"""Comment1"""+"\t"+"""Comment2

MATDNFYR MMILLEEPHK QHSTDGRKIENASHDFVDELLLPIQVDDLEVLNTWFDKFDEEICIPNEGHIKYEISSDGLIVLILDKEISEVIGLVRDFVAANQLDEDSSSDAK

 > Ago0000004
MVSKTLPLYSKATLQKHTDRTSCWVSVGNRKIYDVSQFLDEHPGGDQYILDYAGKDITAVLKDKLIHEHTEAAYEILDESYLVGYLATEEEEIKLLTNEKHVMEVTPENLDTTTFVKELPAEEVLSVATDFGTDYTKHHFLDLNKPLLMQVLRGNFTRDFYIDQIHRPRHYGKGSAPLFGNFLEPLSKTVWWVVPMVWYPVVLYYLTRALQNMPAHLALTCFAAGVFVW
TLIEYSLHRFLFHFDDNMPESNIAFTVHFLLHGVHHYLPMDKYRLVMPPALFVVLCAPFYRLVFSIFPEYCACGCFAGGLFGYVCYDVTHYFLHHHKLPPFMRKLKKYHLEHHYKNYELGFGVTSWYWDKVFGTYLASNSPVSRPKCE
>Ago0000005
MYIKKVIIRGFKTYKNKTEIDNFSPHHNVVVGFNGSGKSNFFAAIRFVLSDDYTNLKREERRSLIYQGTSSVMSGYVEIVFHGAENRTLLGAQDGGVIHIRRTVGLKKDEYMINNKNASRSDVQRLLESAGFSTSNPYNIVPQGRIVSLTNAQNRERLQLLEEVIGAKSFERKLKESLQKMETTEKNREKIRIELEEVEAKLNELDEERKELEKYNSLDRKRKMCQFAL
YDRELNEVTSMVEKLDGEYTNTLVLSEQYIQELEKRESLIETLTKSLNQLGSELKMKESTDLQQAKDSELELAKHLADLNVKYEELISQNNALKEQSASNSESLLAIRSQIAVKEQQLARLSPRFEQLTIEEAAMKAEFKALQQRQRDLLAKRGKYSQFRNKAERDAWIDQELSILKEELQCSSIALTSISEERDSLRIKLTTLDDQIMELNDSAHGPGINAELEDVQQ
ELTVLKKAHLFKIDERKQLWRSEQKIQSVLESLVDDVKRAEGTLSETMDRSLATGLKNVSEIAQRLNLPEGSVFGPLGELIKISEKYKACAEVVGGTSLFHVVVDTENTAALLMQELYNSKGGRVTFIPLNRVHVDSNIVYPSNDEHHCTPLIKKIKYDPKFERAIKHVFGKTIVVKDLNQGTKLAKQFRLNAITLDGDKADSRGVLTGGFHDHHKQKRLDSMRDLKSL
KKEQQGNKSQLEEVKEKLHSIDQEIDELNDKIKKSMSRREMILTQVEAVNIKLEKAKRERFLLEETMVQLISKEEKAKINQKLLQDKLDMYTEDLSRDFDTELTLTEREELDEIAKKLPDLENLLNTTTDALSSVVVKIDSLKAELDSKLKPQAKELEDQPNEIMSTTAIQNLQEHIDAVEDERKTLLERKSTVDNEVQKISEIIDTLKSRQEEEEKSLEKANSQQRAL
LKKLDNYQKEAEKSMLRKMTLSTRRDELQQKIRDIGLLPDDSADKYHNMSSSELLKELSSINDKISKMTNVNKRALENFKKFDDKQKDVMKRAKELDESKESIEKLIDKLKKQKVEAVENTFKKVSENFTQLFEKMVPRGTGKLVIHRRENEPSKPSKRQQKKRKRQETEDVHFNDDQDENSSQDSIYSGVSIEVSFNSKKDEQVHVEQLSGGQKTVCAIALILAIQMV
DPAPFYLFDEIDAALDKQYRTAVAATVKQLSSQAQFICTTFRGDMIAVADRFYRVNFENKISTVVEVTKAEALNFVTGREKNENVI

> Ago0000006
MYKLQVVLVPPSIETELPVVGLPAGSLDNSGFAQGHAQPISTSSIAGGRGEFSSGIGNTSANAVYLSGLVGAHIRRARGRKFLHFTKPTNSLYELADEIVEKCAKMYPGLAEEPEIVTLQDVNECDLDPDFVVKDVFNMDNTVRVLLRNDLDEAAGDRTIYMKKRKLNTGAAGVPAAGGSVQAAVLNVAKKRASIKTSALRVSTPLANQIYPPPTKKQVNSDFEDDDVA
DKSILPPPPPQSPPIRISSGMDQKRINMNDNAVSKSETVDPNKSRQQRLPSGTPMRPVSMVETPNRVSLTGPTVLSESTVSQKSTATPIITNIRITSGMLRIPEPRLSEVEKELKEGPASPAVDLPARPSRIPMKKPYNPSMQQDEDLSSSSSSTEDNVPAVPYENELERSGPTMATRQSSSTIADDQGSPTKKSRFEKNNIGLVELPSPRKSSLEKKVSKLNKGLASD
KGHEDSTGGITRKDHFSDEESEASQNGSVVVNRPETQREKSFQKSELLKIFNSKRFDLPPRFKNSASEDEPTSSNQSRKKKPYVTVLNKDIDNSSPDPRNIIPRRTQRHAAQKAAQSISSGTSRSNVFSGEEKNKYSEENQGDNIASDDNEGVYVHESNALKKLNVHPLKESVVQDTLTGAQDINNPVSSRTVHPSISEHAVTAAASGTLSSNPEYRVAPIVTPTLMSS
AVPGKPEAILSKQGDPPASTALKPQLAPARKVESTQKKQAGKKSDTNQPSRTNSKKTGLPADTAVPSSQKTVRARAAQIDAQIKQTATSSQSPVAESQSSVLNSEEAVADKSEVDNNSSTQDKSLQQKTSKRKTPVKRTANADKKAVTLKRSSNSKGKEKDTEKGKERIKEKEKEKEKEKEKEKEKVYQTPEFVESSDDERADAKTPLEKHSSDRPKSGSPSKHENGQE
KQDSFSGIILKKTAIEKDKVVANAPHKVNEKPMTADAGSTNLSPSGKVSKLDELRSKFTKGRFPQGSSQKQIQQQKNTPKTSSKSIVPTVSDKFKPMDSTSGASSDDSADDDDDDSMDSSTEDEGISMKKPRRGIVQPPKGSVSAPVKHVPQSLSSELEGVPQSTQIPNEATNTAPLTKLLDHISPPSTAKTITSRGVTESKGTSNRSLSSLSDLASRGVPDVREKGVN
YKKTAPNVVNNSSSDESEQETNDGSDSSDSDNSDSDSSDSSDDGNNFISAKSASKALRKNKASSGFASLIRDSQKK
>Ago0000007
MGSLQAREMEELYVYSGENMPRVRLCLLRRYSCVEEVEAYVGRVQDRYTLRLGTVETITGNLKIGCDTHADCEACPFYILEYNEVTTEYSLWKAADADWRLDSIVATMYTGAGGPARERGGLPRELQGLREGLDMEYVWDCLRRLNLPLEQIDWAEFRELLESMLAAKRVADDDCVTLRGVVALAALQATVQTSKRAVRSQLRAYDRRVRAASTPASTRTSSPESLLPA
DSRESSVSSVHYTYGLPATQLDANFKTYFRSMAENFELFEEPALALRGRVAKPRKKAAGRHGHRPVQA
>Ago0000008
MQISAVALSGAALLACASAHQHDRKKPHLVVVYGDVYVSGTDTFTKYYSKMTDKLPASGSDGWSYSWSTATSWTSDSVSTEPSAEPTSTEPTNLPEVTSSPATPVQPTSAPEETFEEPTESTSAEPTSSPEPVKSAEPTSSPEPVKSVEPTSSPEPVKSVEPVKSAEPVKSPEPVKSPEPVKSPEPVKSPEPVKSAEPTSSPEPTGSPQRSRPAGGQNGASFEEEILRA
HNSRRQLHKDTKPLTWSEELAKFARDFANQYDCSGRLVHSDSPYGENLAVGYPTPEKAVKAWYDEISDYSYSHPSFSFSTGHFSQLVWKDTKHLGCAVKKCGGSVGDYLICSYDPAGNFLRRFGENVAPIA
>Ago0000009
MRACLLIPLLLVAATARTVTVSVTATRTVTVISAAAVTTLRTTVFTTAYRTQTTPLTETPLSRPTAVNNTFASAVLDLHNDYRRRHHAVPLRWNSTLYTHAQHYANRILCNGSLVHSGLPHGENLALGYSPAAAVTAWYDEIAEYDFSTPGFSHATGHFTQLVWRSTTSVGCAYVMCGPCYGLYIICQYDPPGNVADQYVANVLP
>Ago0000010
MESQAAVEEPQHSVVIETPPTTAESSETPVADTATGAEPEGAGGAAVVAPKRMPTRADFPPLSSVIFETQKVQWGPNMKKPESQSASPSPSPGPVGSGAKPMRSKTMQEAFSLDLQTQVTISKAEFSKFVVSVKQSHSVSIESTLSKLSRTFLITGSPTNVYNAKRELVKKLTRPVTVVIQVPSKTVSSIIGPGGRMIREITNAAGGIKIDIAKTAEADAYDADLDDQL
INISLHGDVASVNFAKDKILSIVKEETKNATISVAVENKQLIPFISLADVEISEDVTVKAFPNGSEKIVLMGPRDEAKEAKVNVQNYLNTLASKVSEKKISIPRKFQPLIDAEDVREKYKVSVIFPTALGDDTVSFYGLSANLDDAIAYARQSSKQYIVESLEVSKAHGKNVAHAKNLMFYFAKYDILKDIKESFKEVKLVLPTPEELPGLDNVSINIISKADIAEQTK
TVRKQIINIVNRLTPSHVLAVDDLDYELFHKDIKQALSKAEIPFVQLGDHYEGDNTVLLFAKVDEEDFQPSPEEVKEHLEKVAAVLDEVRTKQSKLFTKIVNFDAEFQVLHFSDDSVTWNLVLENITSAGGHAQIKLHTPSEDEITIRGDEKAVKAAVKAFESIAENPSKKSKLTVSVPANTVSRLIGPKGTNLAQIRQKFDVQIDVPSESNDTNTEITLTGLEYNLQH
AKTHIASEAKKWADITTKELIVPTKYHGSLIGSQGTYRIRLENKYSVRIQFPKEGEVVTIKGPSRGVNKAHAELKALLDFEIENGHKSVINVPVEHVPRVIGKNGDVINGIRAELGVELKLLQNTKTAKEQNLDTVQLEITGSRQAIKEASKAVDAIIAEASDFTTKQLEIDAKYHKLIVGPGGSTLKDFISKAGGDDIRNKTVDVPNAESTNKVITISGPKTFVEKMS
KALNQIVQDIKASVAKELNIPADRQGALIGPGGSVRRQLESQFNVRIEVPDKGKEGKVTIHGRPEAVEKCEKEIFSTIIRDSYDQEIMVPAVYHAFVSERGQLINKLRMTYFINVKHGNSSKKANKLSRSEQPIPIERVRGSEGEGTKLTIEEVSAPEASANDNIPWRLTYEHVDLSDILGEEGKHAMTKEQALEAAADQIKERIELAPKANCIGYLWCENVKKFNKVV
GPGGSNIKQIRETTNTLINVPKKSDKVSDIIYVRGTKESVEKACKMICDALNK
"""

fasta_example_output = """>Ago0000001
MFPSSATLLSFYAATSYVAALYVFPGRRIYGHASRNDPKAIWYGMKAVGCGVLANLLIIPWLQSRLASDGFSFVDCFFRL
GLVPGAYAHFRGLHWDTLAYATDILRALSILGSLYAADLLDSAAYYLLVPDTSPVVDLVDRLSCTTGLRNYVFGPITEEL
VYTSMVLQNYRLLQPTISRAMLLLATPMFFGVAHVHHARQLLATGHRPAQVALTSSFQILYTTLFGTFTNYIYYHTAGNL
WACILLHAVCNYLSFPSLSSDVFADYCAKVPPALRALWKMRLLHAWGYTYRLCLLCGLLAFLDGIRTFSSSAGDLFLDA

>Ago0000002
MKADYTARASKTRGGQKVGGVRRESAGQGMSMSEDEFQAAQVRQRRSKSCAGPLEGVQERVLRAHTRQAKEKRKSEVLIA
AQSLDYELQNVKNLKRISIGSMDLLMDPEMEFRMNGAAVSPSASPRNSLTEESLEWFQLSAEQEQGGELPLHAESIDAHS
SAGGSSDTARDELLTSASSVVPSEELSCSHPLSPPAAAKKSTLMSSRSKLGRQHTDTTHTKQATGEPFSSNLLWVRADQH
PNVKPENYLELVHDTLNNLRIGARHGRGDSPVQQSNTPVANGESSTASARSLVRKQSRLRKSFTEVEAIEESQFTDSIGS
EIPVGKRMRVSSLKEITEELTRISNNAGLTDSDAITLARTLGIGSQSADEHATFSECVPSAEAEENEYASSILAKNGLAI
PARSSLRRSKFTTYRIRSAGSDSSPPENKSTSSSSLAAAYERNSRSPKMQKAAFKGSHPRLSGENTSLAPQSPNSINDIY
DHYNTSDTDDGSPELQGSPVTHTSDASFTSSPNIASGEHCARNAHKISPISVSQLSATSSSSTSPGSSKSAESTKSWTFD
QKSPLRSITLNSSTKGHKISAEKKKGWSWFGNSRRPSSESVLLPGTDSEGCPMDESQPIQSEEYPLKHPISPVSDETPRR
GNHSRNRHQTSSPEELEIGWTETSLTTLDGDTEQDGKPQHPKTKEKLEKKFMKIFKKRSGTGLSQDTPNQGEESSLKATS
RIRRSRESKRQTDEPSASLTTSVSLNNHNNGRKSVSISRPTWQRTESSYNGDKGKGSITSLQPAVSVTSSRDSHEQTRQD
PQPQSLVTKMDGSSSNGVTVSSQRAGFSKKGRSKTSHSRSKLSTSHDIAPVDNRPSDVDQKSTEANNTLSSRAEDSHPSL
DPGKSALGPGEIAHSLPPRKLRFDDVLRPEKPNSPMKFTPSAFGFPLPPLTVSTVIMFDHRLPIYVERAIYRLSHLKLSD
PKRELRQQVLLSNFMYSYLNLVNHSLYLQQIEEDKNQGIQFESTDNMASSSGVVPEKW

>Ago0000003"""+"\t"+"""Comment1"""+"\t"+"""Comment2
MATDNFYRMMILLEEPHKQHSTDGRKIENASHDFVDELLLPIQVDDLEVLNTWFDKFDEEICIPNEGHIKYEISSDGLIV
LILDKEISEVIGLVRDFVAANQLDEDSSSDAK

>Ago0000004
MVSKTLPLYSKATLQKHTDRTSCWVSVGNRKIYDVSQFLDEHPGGDQYILDYAGKDITAVLKDKLIHEHTEAAYEILDES
YLVGYLATEEEEIKLLTNEKHVMEVTPENLDTTTFVKELPAEEVLSVATDFGTDYTKHHFLDLNKPLLMQVLRGNFTRDF
YIDQIHRPRHYGKGSAPLFGNFLEPLSKTVWWVVPMVWYPVVLYYLTRALQNMPAHLALTCFAAGVFVWTLIEYSLHRFL
FHFDDNMPESNIAFTVHFLLHGVHHYLPMDKYRLVMPPALFVVLCAPFYRLVFSIFPEYCACGCFAGGLFGYVCYDVTHY
FLHHHKLPPFMRKLKKYHLEHHYKNYELGFGVTSWYWDKVFGTYLASNSPVSRPKCE

>Ago0000005
MYIKKVIIRGFKTYKNKTEIDNFSPHHNVVVGFNGSGKSNFFAAIRFVLSDDYTNLKREERRSLIYQGTSSVMSGYVEIV
FHGAENRTLLGAQDGGVIHIRRTVGLKKDEYMINNKNASRSDVQRLLESAGFSTSNPYNIVPQGRIVSLTNAQNRERLQL
LEEVIGAKSFERKLKESLQKMETTEKNREKIRIELEEVEAKLNELDEERKELEKYNSLDRKRKMCQFALYDRELNEVTSM
VEKLDGEYTNTLVLSEQYIQELEKRESLIETLTKSLNQLGSELKMKESTDLQQAKDSELELAKHLADLNVKYEELISQNN
ALKEQSASNSESLLAIRSQIAVKEQQLARLSPRFEQLTIEEAAMKAEFKALQQRQRDLLAKRGKYSQFRNKAERDAWIDQ
ELSILKEELQCSSIALTSISEERDSLRIKLTTLDDQIMELNDSAHGPGINAELEDVQQELTVLKKAHLFKIDERKQLWRS
EQKIQSVLESLVDDVKRAEGTLSETMDRSLATGLKNVSEIAQRLNLPEGSVFGPLGELIKISEKYKACAEVVGGTSLFHV
VVDTENTAALLMQELYNSKGGRVTFIPLNRVHVDSNIVYPSNDEHHCTPLIKKIKYDPKFERAIKHVFGKTIVVKDLNQG
TKLAKQFRLNAITLDGDKADSRGVLTGGFHDHHKQKRLDSMRDLKSLKKEQQGNKSQLEEVKEKLHSIDQEIDELNDKIK
KSMSRREMILTQVEAVNIKLEKAKRERFLLEETMVQLISKEEKAKINQKLLQDKLDMYTEDLSRDFDTELTLTEREELDE
IAKKLPDLENLLNTTTDALSSVVVKIDSLKAELDSKLKPQAKELEDQPNEIMSTTAIQNLQEHIDAVEDERKTLLERKST
VDNEVQKISEIIDTLKSRQEEEEKSLEKANSQQRALLKKLDNYQKEAEKSMLRKMTLSTRRDELQQKIRDIGLLPDDSAD
KYHNMSSSELLKELSSINDKISKMTNVNKRALENFKKFDDKQKDVMKRAKELDESKESIEKLIDKLKKQKVEAVENTFKK
VSENFTQLFEKMVPRGTGKLVIHRRENEPSKPSKRQQKKRKRQETEDVHFNDDQDENSSQDSIYSGVSIEVSFNSKKDEQ
VHVEQLSGGQKTVCAIALILAIQMVDPAPFYLFDEIDAALDKQYRTAVAATVKQLSSQAQFICTTFRGDMIAVADRFYRV
NFENKISTVVEVTKAEALNFVTGREKNENVI

>Ago0000006
MYKLQVVLVPPSIETELPVVGLPAGSLDNSGFAQGHAQPISTSSIAGGRGEFSSGIGNTSANAVYLSGLVGAHIRRARGR
KFLHFTKPTNSLYELADEIVEKCAKMYPGLAEEPEIVTLQDVNECDLDPDFVVKDVFNMDNTVRVLLRNDLDEAAGDRTI
YMKKRKLNTGAAGVPAAGGSVQAAVLNVAKKRASIKTSALRVSTPLANQIYPPPTKKQVNSDFEDDDVADKSILPPPPPQ
SPPIRISSGMDQKRINMNDNAVSKSETVDPNKSRQQRLPSGTPMRPVSMVETPNRVSLTGPTVLSESTVSQKSTATPIIT
NIRITSGMLRIPEPRLSEVEKELKEGPASPAVDLPARPSRIPMKKPYNPSMQQDEDLSSSSSSTEDNVPAVPYENELERS
GPTMATRQSSSTIADDQGSPTKKSRFEKNNIGLVELPSPRKSSLEKKVSKLNKGLASDKGHEDSTGGITRKDHFSDEESE
ASQNGSVVVNRPETQREKSFQKSELLKIFNSKRFDLPPRFKNSASEDEPTSSNQSRKKKPYVTVLNKDIDNSSPDPRNII
PRRTQRHAAQKAAQSISSGTSRSNVFSGEEKNKYSEENQGDNIASDDNEGVYVHESNALKKLNVHPLKESVVQDTLTGAQ
DINNPVSSRTVHPSISEHAVTAAASGTLSSNPEYRVAPIVTPTLMSSAVPGKPEAILSKQGDPPASTALKPQLAPARKVE
STQKKQAGKKSDTNQPSRTNSKKTGLPADTAVPSSQKTVRARAAQIDAQIKQTATSSQSPVAESQSSVLNSEEAVADKSE
VDNNSSTQDKSLQQKTSKRKTPVKRTANADKKAVTLKRSSNSKGKEKDTEKGKERIKEKEKEKEKEKEKEKEKVYQTPEF
VESSDDERADAKTPLEKHSSDRPKSGSPSKHENGQEKQDSFSGIILKKTAIEKDKVVANAPHKVNEKPMTADAGSTNLSP
SGKVSKLDELRSKFTKGRFPQGSSQKQIQQQKNTPKTSSKSIVPTVSDKFKPMDSTSGASSDDSADDDDDDSMDSSTEDE
GISMKKPRRGIVQPPKGSVSAPVKHVPQSLSSELEGVPQSTQIPNEATNTAPLTKLLDHISPPSTAKTITSRGVTESKGT
SNRSLSSLSDLASRGVPDVREKGVNYKKTAPNVVNNSSSDESEQETNDGSDSSDSDNSDSDSSDSSDDGNNFISAKSASK
ALRKNKASSGFASLIRDSQKK

>Ago0000007
MGSLQAREMEELYVYSGENMPRVRLCLLRRYSCVEEVEAYVGRVQDRYTLRLGTVETITGNLKIGCDTHADCEACPFYIL
EYNEVTTEYSLWKAADADWRLDSIVATMYTGAGGPARERGGLPRELQGLREGLDMEYVWDCLRRLNLPLEQIDWAEFREL
LESMLAAKRVADDDCVTLRGVVALAALQATVQTSKRAVRSQLRAYDRRVRAASTPASTRTSSPESLLPADSRESSVSSVH
YTYGLPATQLDANFKTYFRSMAENFELFEEPALALRGRVAKPRKKAAGRHGHRPVQA

>Ago0000008
MQISAVALSGAALLACASAHQHDRKKPHLVVVYGDVYVSGTDTFTKYYSKMTDKLPASGSDGWSYSWSTATSWTSDSVST
EPSAEPTSTEPTNLPEVTSSPATPVQPTSAPEETFEEPTESTSAEPTSSPEPVKSAEPTSSPEPVKSVEPTSSPEPVKSV
EPVKSAEPVKSPEPVKSPEPVKSPEPVKSPEPVKSAEPTSSPEPTGSPQRSRPAGGQNGASFEEEILRAHNSRRQLHKDT
KPLTWSEELAKFARDFANQYDCSGRLVHSDSPYGENLAVGYPTPEKAVKAWYDEISDYSYSHPSFSFSTGHFSQLVWKDT
KHLGCAVKKCGGSVGDYLICSYDPAGNFLRRFGENVAPIA

>Ago0000009
MRACLLIPLLLVAATARTVTVSVTATRTVTVISAAAVTTLRTTVFTTAYRTQTTPLTETPLSRPTAVNNTFASAVLDLHN
DYRRRHHAVPLRWNSTLYTHAQHYANRILCNGSLVHSGLPHGENLALGYSPAAAVTAWYDEIAEYDFSTPGFSHATGHFT
QLVWRSTTSVGCAYVMCGPCYGLYIICQYDPPGNVADQYVANVLP

>Ago0000010
MESQAAVEEPQHSVVIETPPTTAESSETPVADTATGAEPEGAGGAAVVAPKRMPTRADFPPLSSVIFETQKVQWGPNMKK
PESQSASPSPSPGPVGSGAKPMRSKTMQEAFSLDLQTQVTISKAEFSKFVVSVKQSHSVSIESTLSKLSRTFLITGSPTN
VYNAKRELVKKLTRPVTVVIQVPSKTVSSIIGPGGRMIREITNAAGGIKIDIAKTAEADAYDADLDDQLINISLHGDVAS
VNFAKDKILSIVKEETKNATISVAVENKQLIPFISLADVEISEDVTVKAFPNGSEKIVLMGPRDEAKEAKVNVQNYLNTL
ASKVSEKKISIPRKFQPLIDAEDVREKYKVSVIFPTALGDDTVSFYGLSANLDDAIAYARQSSKQYIVESLEVSKAHGKN
VAHAKNLMFYFAKYDILKDIKESFKEVKLVLPTPEELPGLDNVSINIISKADIAEQTKTVRKQIINIVNRLTPSHVLAVD
DLDYELFHKDIKQALSKAEIPFVQLGDHYEGDNTVLLFAKVDEEDFQPSPEEVKEHLEKVAAVLDEVRTKQSKLFTKIVN
FDAEFQVLHFSDDSVTWNLVLENITSAGGHAQIKLHTPSEDEITIRGDEKAVKAAVKAFESIAENPSKKSKLTVSVPANT
VSRLIGPKGTNLAQIRQKFDVQIDVPSESNDTNTEITLTGLEYNLQHAKTHIASEAKKWADITTKELIVPTKYHGSLIGS
QGTYRIRLENKYSVRIQFPKEGEVVTIKGPSRGVNKAHAELKALLDFEIENGHKSVINVPVEHVPRVIGKNGDVINGIRA
ELGVELKLLQNTKTAKEQNLDTVQLEITGSRQAIKEASKAVDAIIAEASDFTTKQLEIDAKYHKLIVGPGGSTLKDFISK
AGGDDIRNKTVDVPNAESTNKVITISGPKTFVEKMSKALNQIVQDIKASVAKELNIPADRQGALIGPGGSVRRQLESQFN
VRIEVPDKGKEGKVTIHGRPEAVEKCEKEIFSTIIRDSYDQEIMVPAVYHAFVSERGQLINKLRMTYFINVKHGNSSKKA
NKLSRSEQPIPIERVRGSEGEGTKLTIEEVSAPEASANDNIPWRLTYEHVDLSDILGEEGKHAMTKEQALEAAADQIKER
IELAPKANCIGYLWCENVKKFNKVVGPGGSNIKQIRETTNTLINVPKKSDKVSDIIYVRGTKESVEKACKMICDALNK
"""

CYS1_DICDI="-----MKVILLFVLAVFTVFVSS---------------RGIPPEEQ------------SQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII--"
phylip_interlived = """ 3 384
CYS1_DICDI   -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- --------SQ
ALEU_HORVU   MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG ALGRTRHALR
CATH_HUMAN   ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK---- --------FH

             FLEFQDKFNK KY-SHEEYLE RFEIFKSNLG KIEELNLIAI NHKADTKFGV NKFADLSSDE
             FARFAVRYGK SYESAAEVRR RFRIFSESLE EVRSTN---- RKGLPYRLGI NRFSDMSWEE
             FKSWMSKHRK TY-STEEYHH RLQTFASNWR KINAHN---- NGNHTFKMAL NQFSDMSFAE

             FKNYYLNNKE AIFTDDLPVA DYLDDEFINS IPTAFDWRTR G-AVTPVKNQ GQCGSCWSFS
             FQATRL-GAA QTCSATLAGN HLMRDA--AA LPETKDWRED G-IVSPVKNQ AHCGSCWTFS
             IKHKYLWSEP QNCSAT--KS NYLRGT--GP YPPSVDWRKK GNFVSPVKNQ GACGSCWTFS

             TTGNVEGQHF ISQNKLVSLS EQNLVDCDHE CMEYEGEEAC DEGCNGGLQP NAYNYIIKNG
             TTGALEAAYT QATGKNISLS EQQLVDCAGG FNNF------ --GCNGGLPS QAFEYIKYNG
             TTGALESAIA IATGKMLSLA EQQLVDCAQD FNNY------ --GCQGGLPS QAFEYILYNK

             GIQTESSYPY TAETGTQCNF NSANIGAKIS NFTMIP-KNE TVMAGYIVST GPLAIAADAV
             GIDTEESYPY KGVNGV-CHY KAENAAVQVL DSVNITLNAE DELKNAVGLV RPVSVAFQVI
             GIMGEDTYPY QGKDGY-CKF QPGKAIGFVK DVANITIYDE EAMVEAVALY NPVSFAFEVT

             E-WQFYIGGV F-DIPCN--P NSLDHGILIV GYSAKNTIFR KNMPYWIVKN SWGADWGEQG
             DGFRQYKSGV YTSDHCGTTP DDVNHAVLAV GYGVENGV-- ---PYWLIKN SWGADWGDNG
             QDFMMYRTGI YSSTSCHKTP DKVNHAVLAV GYGEKNGI-- ---PYWIVKN SWGPQWGMNG

             YIYLRRGKNT CGVSNFVSTS II--
             YFKMEMGKNM CAIATCASYP VVAA
             YFLIERGKNM CGLAACASYP IPLV

"""

phylip_sequencial = """ 3 384
CYS1_DICDI   -----MKVILLFVLAVFTVFVSS---------------RGIPPEEQ-
-----------SQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTK
FGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPV
KNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGG
LQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNETVMAGYI
VSTGPLAIAADAVE-WQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWI
VKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII--
ALEU_HORVU   MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESA
VLGALGRTRHALRFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTN----RKGLPYR
LGINRFSDMSWEEFQATRL-GAAQTCSATLAGNHLMRDA--AALPETKDWREDG-IVSPV
KNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGG
LPSQAFEYIKYNGGIDTEESYPYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAV
GLVRPVSVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGV-----PYWL
IKNSWGADWGDNGYFKMEMGKNMCAIATCASYPVVAA
CATH_HUMAN   ------MWATLPLLCAGAWLLGV--------PVCGAAELSVNSLEK-
-----------FHFKSWMSKHRKTY-STEEYHHRLQTFASNWRKINAHN----NGNHTFK
MALNQFSDMSFAEIKHKYLWSEPQNCSAT--KSNYLRGT--GPYPPSVDWRKKGNFVSPV
KNQGACGSCWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNY--------GCQGG
LPSQAFEYILYNKGIMGEDTYPYQGKDGY-CKFQPGKAIGFVKDVANITIYDEEAMVEAV
ALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVNHAVLAVGYGEKNGI-----PYWI
VKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV
"""

class Test_Coretype_SeqGroup(unittest.TestCase):
    """ Tests core functionality of Alignmnets objects """
    def test_fasta_parser(self):
        """ test fasta read an write """
        # FASTA IO
        open("/tmp/ete_test_fasta.txt","w").write(fasta_example)
        # Test reading from file and from string
        SEQS = SeqGroup(fasta_example)
        SEQS2 = SeqGroup("/tmp/ete_test_fasta.txt")

        # Compare the result is the same
        self.assertEqual(SEQS.write(), SEQS2.write())

        # Test writing into file
        SEQS.write(format="fasta", outfile="/tmp/ete_fastaIO")
        self.assertEqual(SEQS.write(), fasta_example_output)

        # Test SeqGroup obj integrity
        self.assertEqual(SEQS.get_seq("Ago0000003"), Ago0000003)
        self.assertEqual(SEQS2.get_seq("Ago0000003"), Ago0000003)
        self.assertEqual(len(SEQS), len(SEQS.id2seq))
        self.assertEqual("Ago0000003" in SEQS, True)
        self.assertEqual("Ago" in SEQS, False)
        self.assertEqual(SEQS.get_entries(), [e for e in SEQS])

        # Check that the default  write format is FASTA
        self.assertEqual(SEQS.__str__(), SEQS.write(format="fasta"))


    def test_phylip_parser(self):
        """ Tests phylip read and write """
        # PHYLIP INTERLEAVED
        open("/tmp/ete_test_iphylip.txt","w").write(phylip_interlived)
        SEQS = SeqGroup("/tmp/ete_test_iphylip.txt", format="iphylip")
        SEQS2 = SeqGroup(phylip_interlived, format="iphylip")
        self.assertEqual(SEQS.write(), SEQS2.write())
        SEQS.write(format="iphylip",  outfile="/tmp/ete_write_file")
        self.assertEqual(SEQS.write(format="iphylip"), phylip_interlived)

        # Test SeqGroup obj integrity
        self.assertEqual(SEQS.get_seq("CYS1_DICDI"), CYS1_DICDI)
        self.assertEqual(SEQS2.get_seq("CYS1_DICDI"), CYS1_DICDI)
        self.assertEqual(len(SEQS), len(SEQS.id2seq))
        self.assertEqual("CYS1_DICDI" in SEQS, True)
        self.assertEqual(SEQS.get_entries(), [e for e in SEQS])

        # PHYLIP SEQUENCIAL FORMAT
        open("/tmp/ete_test_phylip.txt","w").write(phylip_sequencial)
        SEQS = SeqGroup("/tmp/ete_test_phylip.txt", format="phylip")
        SEQS2 = SeqGroup(phylip_sequencial, format="phylip")
        self.assertEqual(SEQS.write(), SEQS2.write())
        SEQS.write(format="phylip",  outfile="/tmp/ete_write_file")
        self.assertEqual(SEQS.write(format="phylip"), phylip_sequencial)

        # Test SeqGroup obj integrity
        self.assertEqual(SEQS.get_seq("CYS1_DICDI"), CYS1_DICDI)
        self.assertEqual(SEQS2.get_seq("CYS1_DICDI"), CYS1_DICDI)
        self.assertEqual(len(SEQS), len(SEQS.id2seq))
        self.assertEqual("CYS1_DICDI" in SEQS, True)
        self.assertEqual("CYS1" in SEQS, False)
        self.assertEqual(SEQS.get_entries(), [e for e in SEQS])


class Test_Coretype_Tree(unittest.TestCase):
    """ Tests tree basics. """
    def test_tree_read_and_write(self):
        """ Tests newick support """
        # Read and write newick tree from file (and support for NHX
        # format): newick parser
        open("/tmp/etetemptree.nw","w").write(nw_full)
        t = Tree("/tmp/etetemptree.nw")
        self.assertEqual(nw_full, t.write(features=["flag","mood"]))
        self.assertEqual(nw_topo,  t.write(format=9))
        self.assertEqual( nw_dist, t.write(format=5))

        # Read and write newick tree from *string* (and support for NHX
        # format)
        t = Tree(nw_full)
        self.assertEqual(nw_full, t.write(features=["flag","mood"]))
        self.assertEqual(nw_topo, t.write(format=9))
        self.assertEqual( nw_dist, t.write(format=5))

        # Read complex newick
        t = Tree(nw2_full)
        self.assertEqual(nw2_full,  t.write())

        # Read wierd topologies
        t = Tree(nw_simple5)
        self.assertEqual(nw_simple5,  t.write(format=9))
        t = Tree(nw_simple6)
        self.assertEqual(nw_simple6,  t.write(format=9))


    def test_newick_formats(self):
        """ tests different newick subformats """
        from ete2.parser.newick import print_supported_formats, NW_FORMAT
        print_supported_formats()

        # Let's stress a bit
        for i in xrange(10):
            t = Tree()
            t.populate(50)
            for f in NW_FORMAT:
                self.assertEqual(t.write(format=f), Tree(t.write(format=f),format=f).write(format=f))

        nw0 = "((A:0.813705,(E:0.545591,D:0.411772)1.000000:0.137245)1.000000:0.976306,C:0.074268);"
        nw1 = "((A:0.813705,(E:0.545591,D:0.411772)B:0.137245)A:0.976306,C:0.074268);"
        nw2 = "((A:0.813705,(E:0.545591,D:0.411772)1.000000:0.137245)1.000000:0.976306,C:0.074268);"
        nw3 = "((A:0.813705,(E:0.545591,D:0.411772)B:0.137245)A:0.976306,C:0.074268);"
        nw4 = "((A:0.813705,(E:0.545591,D:0.411772)),C:0.074268);"
        nw5 = "((A:0.813705,(E:0.545591,D:0.411772):0.137245):0.976306,C:0.074268);"
        nw6 = "((A:0.813705,(E:0.545591,D:0.411772)B)A,C:0.074268);"
        nw7 = "((A,(E,D)B)A,C);"
        nw8 = "((A,(E,D)),C);"
        nw9 = "((,(,)),);"


    def test_tree_manipulation(self):
        """ tests operations which modify tree topology """
        nw_tree = "((NoName:1.000000,Turtle:1.300000)1.000000:1.000000,(A:0.300000,B:2.400000)1.000000:0.430000);"

        # Manipulate Topologys
        # Adding and removing nodes (add_child, remove_child,
        # add_sister, remove_sister). The resulting neiwck tree should
        # match the nw_tree defined before.
        t = Tree()
        c1 = t.add_child()
        c2 = t.add_child(dist=0.43)
        n = TreeNode()
        _n = c1.add_child(n)
        c3 = _n.add_sister(name="Turtle", dist="1.3")
        c4 = c2.add_child(name="A", dist="0.3")

        c5 = c2.add_child(name="todelete")
        _c5 = c2.remove_child(c5)

        c6 = c2.add_child(name="todelete")
        _c6 = c4.remove_sister(c6)

        c7 = c2.add_child(name="B", dist=2.4)

        self.assertEqual(nw_tree, t.write())
        self.assertEqual(_c5, c5)
        self.assertEqual(_c6, c6)
        self.assertEqual(_n, n)

        # Delete,
        t = Tree("(((A, B), C)[&&NHX:name=I], (D, F)[&&NHX:name=J])[&&NHX:name=root];")
        D = t.search_nodes(name="D")[0]
        F = t.search_nodes(name="F")[0]
        J = t.search_nodes(name="J")[0]
        root = t.search_nodes(name="root")[0]
        J.delete()
        self.assertEqual(J.up, None)
        self.assertEqual(J in t, False)
        self.assertEqual(D.up, root)
        self.assertEqual(F.up, root)

        #detach
        t = Tree("(((A, B)[&&NHX:name=H], C)[&&NHX:name=I], (D, F)[&&NHX:name=J])[&&NHX:name=root];")
        D = t.search_nodes(name="D")[0]
        F = t.search_nodes(name="F")[0]
        J = t.search_nodes(name="J")[0]
        root = t.search_nodes(name="root")[0]
        J.detach()
        self.assertEqual(J.up, None)
        self.assertEqual(J in t, False)
        self.assertEqual(set([n.name for n in t.iter_descendants()]),set(["A","B","C","I","H"]))

        #prune
        t1 = Tree("(((A, B), C)[&&NHX:name=I], (D, F)[&&NHX:name=J])[&&NHX:name=root];")
        D1 = t1.search_nodes(name="D")[0]
        root = t.search_nodes(name="root")[0]
        t1.prune(["A","C", D1])
        self.assertEqual(set([n.name for n in t1.iter_descendants()]),  set(["A","C","D","I"]))
        t_fuzzy = Tree("(((A,B), C),(D,E));")
        orig_nw = t_fuzzy.write()
        ref_nodes = t_fuzzy.get_leaves()
        t_fuzzy.populate(1000)
        t_fuzzy.prune(ref_nodes)
        self.assertEqual(t_fuzzy.write(),orig_nw)
       

        # getting nodes, get_childs, get_sisters, get_tree_root,
        # get_common_ancestor, get_nodes_by_name
        # get_descendants_by_name, is_leaf, is_root
        t = Tree("(((A,B),C)[&&NHX:tag=common],D)[&&NHX:tag=root];")
        A = t.search_nodes(name="A")[0]
        B = t.search_nodes(name="B")[0]
        C = t.search_nodes(name="C")[0]

        self.assertEqual("A", A.name)

        common  = A.get_common_ancestor(C)
        self.assertEqual("common", common.tag)

        common  = A.get_common_ancestor(C, B)
        self.assertEqual("common", common.tag)

        self.assertEqual("root", A.get_tree_root().tag)
        self.assertEqual("root", B.get_tree_root().tag)
        self.assertEqual("root", C.get_tree_root().tag)
        self.assertEqual("root", common.get_tree_root().tag)

        self.assert_(common.get_tree_root().is_root())
        self.assert_(not A.is_root())
        self.assert_(A.is_leaf())
        self.assert_(not A.get_tree_root().is_leaf())

        # Tree magic python features
        t = Tree(nw_dflt)
        self.assertEqual(len(t), 20)
        self.assert_("Ddi0002240" in t)
        self.assert_(t.children[0] in t)
        for a in t:
            self.assert_(a.name)

        # Populate
        t = Tree(nw_full)
        prev_size= len(t)
        t.populate(25)
        self.assertEqual(len(t), prev_size+25)

        # Adding and removing features
        t = Tree("(((A,B),C)[&&NHX:tag=common],D)[&&NHX:tag=root];")
        A = t.search_nodes(name="A")[0]

        # Iterators, get_leaves, get_leaf_names
        t = Tree(nw2_full)
        self.assert_(t.get_leaf_names(), [name for name in  t.iter_leaf_names()])
        self.assert_(t.get_leaves(), [name for name in  t.iter_leaves()])
        self.assert_(t.get_descendants(), [n for n in  t.iter_descendants()])

        self.assertEqual(set([n for n in t.traverse("preorder")]), \
                             set([n for n in t.traverse("postorder")]))
        self.assert_(t in set([n for n in t.traverse("preorder")]))

        # Swap childs
        n = t.get_children()
        t.swap_childs()
        n.reverse()
        self.assertEqual(n, t.get_children())

        # Distances: get_distance, get_farthest_node,
        # get_farthest_descendant, get_midpoint_outgroup
        t = Tree("(((A:0.1, B:0.01):0.001, C:0.0001):1.0[&&NHX:name=I], (D:0.00001):0.000001[&&NHX:name=J]):2.0[&&NHX:name=root];")
        A = t.search_nodes(name="A")[0]
        B = t.search_nodes(name="B")[0]
        C = t.search_nodes(name="C")[0]
        D = t.search_nodes(name="D")[0]
        I = t.search_nodes(name="I")[0]
        J = t.search_nodes(name="J")[0]
        root = t.search_nodes(name="root")[0]

        self.assertEqual(A.get_common_ancestor(I).name, "I")
        self.assertEqual(A.get_common_ancestor(D).name, "root")
        self.assertEqual(A.get_distance(I), 0.101)
        self.assertEqual(A.get_distance(B), 0.11)
        self.assertEqual(A.get_distance(A), 0)
        self.assertEqual(I.get_distance(I), 0)
        self.assertEqual(A.get_distance(root), root.get_distance(A))

        # Get_farthest_node, get_farthest_leaf
        self.assertEqual(root.get_farthest_leaf(), (A,1.101) )
        self.assertEqual(root.get_farthest_node(), (A,1.101) )
        self.assertEqual(A.get_farthest_leaf(), (A, 0.0))
        self.assertEqual(A.get_farthest_node(), (D, 1.101011))
        self.assertEqual(I.get_farthest_node(), (D, 1.000011))

        # Test set_outgroup and get_midpoint_outgroup
        t = Tree(nw2_full)
        nodes = t.get_descendants()
        t.set_outgroup(t.get_midpoint_outgroup())
        o1, o2 = t.children[0], t.children[1]
        nw_original = t.write()
        YGR028W = t.get_leaves_by_name("YGR028W")[0]
        YGR138C = t.get_leaves_by_name("YGR138C")[0]
        d1 = YGR138C.get_distance(YGR028W)
        # Randomizing outgroup test: Can we recover origial state
        # after many manipulations?
        for i in xrange(10):
            for j in xrange(1000):
                n = random.sample(nodes, 1)[0]
                if n is None:
                    print "NONE"
                t.set_outgroup(n)
            t.set_outgroup(t.get_midpoint_outgroup())
            self.assertEqual([t.children[0], t.children[1]], [o1, o2])
            #self.assertEqual(t.write(), nw_original)
        d2 = YGR138C.get_distance(YGR028W)
        self.assertEqual(d1, d2)

        # Test unrooting
        t.unroot()

        # Ascii
        t.get_ascii()

class Test_phylo_module(unittest.TestCase):

    # ALL TESTS USE THIS EXAMPLE TREE
    #
    #                    /-Dme_001
    #          /--------|
    #         |          \-Dme_002
    #         |
    #         |                              /-Cfa_001
    #         |                    /--------|
    #         |                   |          \-Mms_001
    #         |                   |
    #---------|                   |                                        /-Hsa_001
    #         |                   |                              /--------|
    #         |          /--------|                    /--------|          \-Hsa_003
    #         |         |         |                   |         |
    #         |         |         |          /--------|          \-Ptr_001
    #         |         |         |         |         |
    #         |         |         |         |          \-Mmu_001
    #         |         |          \--------|
    #          \--------|                   |                    /-Hsa_004
    #                   |                   |          /--------|
    #                   |                    \--------|          \-Ptr_004
    #                   |                             |
    #                   |                              \-Mmu_004
    #                   |
    #                   |          /-Ptr_002
    #                    \--------|
    #                             |          /-Hsa_002
    #                              \--------|
    #                                        \-Mmu_002


    def test_link_alignmets(self):
        """ Phylotree can be linked to SeqGroup objects"""
        fasta = """
         >seqA
         MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
         >seqB
         MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
         >seqC
         MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
         >seqD
         MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
        """
        # Caution with iphylip string. blank spaces in the beginning are important
        iphylip = """
         4 76
      seqA   MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA
      seqB   MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA
      seqC   MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA
      seqD   MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ---

             LTNVSHQFMA LTNVSH
             LTNVSH---- ------
             LTNVSH---- ------
             -------FMA LTNVSH
        """

        # Loads a tree and link it to an alignment. As usual, 'alignment' can be
        # the path to a file or the data themselves in text string format

        alg1 = SeqGroup(fasta)
        alg2 = SeqGroup(iphylip, format="iphylip")

        t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta, alg_format="fasta")

        for l in t.get_leaves():
            self.assertEqual(l.sequence, alg1.get_seq(l.name))

        # The associated alignment can be changed at any time
        t.link_to_alignment(alignment=alg2, alg_format="iphylip")

        for l in t.get_leaves():
            self.assertEqual(l.sequence, alg2.get_seq(l.name))

    def test_get_sp_overlap_on_all_descendants(self):
        """ Tests ortholgy prediction using the sp overlap"""
        # Creates a gene phylogeny with several duplication events at
        # different levels.
        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')

        # Scans the tree using the species overlap algorithm and detect all
        # speciation and duplication events
        events = t.get_descendant_evol_events()

        # Check that all duplications are detected
        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
        self.assertEqual(dup1.evoltype, "D")

        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
        self.assertEqual(dup2.evoltype, "D")

        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
        self.assertEqual(dup3.evoltype, "D")

        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
        self.assertEqual(dup4.evoltype, "D")


        # All other nodes should be speciation
        for node in t.traverse():
            if not node.is_leaf() and \
                   node not in set([dup1, dup2, dup3, dup4]):
                self.assertEqual(node.evoltype, "S")

        # Check events
        for e in events:
            self.assertEqual(e.node.evoltype, e.etype)

        # Check orthology/paralogy prediction
        orthologs = set()
        for e in events:
            if e.node == dup1:
                self.assertEqual(e.inparalogs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003']))
                self.assertEqual(e.outparalogs, set(['Mmu_004', 'Ptr_004', 'Hsa_004']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.outparalogs, e.out_seqs)
                self.assertEqual(e.inparalogs, e.in_seqs)
            elif e.node == dup2:
                self.assertEqual(e.inparalogs, set(['Dme_001']))
                self.assertEqual(e.outparalogs, set(['Dme_002']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.outparalogs, e.out_seqs)
                self.assertEqual(e.inparalogs, e.in_seqs)
            elif e.node == dup3:
                self.assertEqual(e.inparalogs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001']))
                self.assertEqual(e.outparalogs, set(['Hsa_002', 'Ptr_002', 'Mmu_002']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.outparalogs, e.out_seqs)
                self.assertEqual(e.inparalogs, e.in_seqs)
            elif e.node == dup4:
                self.assertEqual(e.inparalogs, set(['Hsa_001']))
                self.assertEqual(e.outparalogs, set(['Hsa_003']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.outparalogs, e.out_seqs)
                self.assertEqual(e.inparalogs, e.in_seqs)
            else:

                key1 = list(e.inparalogs)
                key2 = list(e.orthologs)
                key1.sort()
                key2.sort()
                orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))

        orthologies = [
            [set(['Dme_001', 'Dme_002']), set(['Ptr_001', 'Cfa_001', 'Hsa_002', 'Hsa_003', 'Ptr_002', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001', 'Mmu_002'])],
            [set(['Mms_001', 'Cfa_001']), set(['Hsa_003', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001'])],
            [set(['Ptr_002']), set(['Hsa_002', 'Mmu_002'])],
            [set(['Cfa_001']), set(['Mms_001'])],
            [set(['Hsa_002']), set(['Mmu_002'])],
            [set(['Hsa_003', 'Hsa_001', 'Ptr_001']), set(['Mmu_001'])],
            [set(['Ptr_004', 'Hsa_004']), set(['Mmu_004'])],
            [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])],
            [set(['Hsa_004']), set(['Ptr_004'])]
            ]
        expected_orthologs = set()
        for l1,l2 in orthologies:
            key1 = list(l1)
            key2 = list(l2)
            key1.sort()
            key2.sort()
            expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))

        # Are all orthologies as expected
        self.assertEqual(expected_orthologs, orthologs)

    def test_get_sp_overlap_on_a_seed(self):
        """ Tests ortholgy prediction using sp overlap"""
        # Creates a gene phylogeny with several duplication events at
        # different levels.
        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')

        # Scans the tree using the species overlap algorithm
        seed = t.search_nodes(name="Hsa_001")[0]
        events = seed.get_my_evol_events()

        # Check that duplications are detected
        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
        self.assertEqual(dup1.evoltype, "D")

        # This duplication is not in the seed path
        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
        self.assert_(not hasattr(dup2, "evoltype"))

        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
        self.assertEqual(dup3.evoltype, "D")

        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
        self.assertEqual(dup4.evoltype, "D")

        # All other nodes should be speciation
        node = seed
        while node:
            if not node.is_leaf() and \
                   node not in set([dup1, dup2, dup3, dup4]):
                self.assertEqual(node.evoltype, "S")
            node = node.up

        # Check events
        for e in events:
            self.assertEqual(e.node.evoltype, e.etype)

        # Check orthology/paralogy prediction
        orthologs = set()
        for e in events:
            if e.node == dup1:
                self.assertEqual(e.inparalogs, set(['Hsa_001', 'Hsa_003']))
                self.assertEqual(e.outparalogs, set(['Hsa_004']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.in_seqs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003']))
                self.assertEqual(e.out_seqs, set(['Mmu_004', 'Ptr_004', 'Hsa_004']))
            elif e.node == dup3:
                self.assertEqual(e.inparalogs, set(['Hsa_003', 'Hsa_001',  'Hsa_004' ]))
                self.assertEqual(e.outparalogs, set(['Hsa_002']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.in_seqs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001']))
                self.assertEqual(e.out_seqs, set(['Hsa_002', 'Ptr_002', 'Mmu_002']))
            elif e.node == dup4:
                self.assertEqual(e.inparalogs, set(['Hsa_001']))
                self.assertEqual(e.outparalogs, set(['Hsa_003']))
                self.assertEqual(e.orthologs, set())
                self.assertEqual(e.in_seqs, set(['Hsa_001']))
                self.assertEqual(e.out_seqs, set(['Hsa_003']))
            else:

                key1 = list(e.inparalogs)
                key2 = list(e.orthologs)
                key1.sort()
                key2.sort()
                orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))


        orthologies = [
            [set(['Dme_001', 'Dme_002']), set([ 'Hsa_002', 'Hsa_003', 'Hsa_001',  'Hsa_004' ])],
            [set(['Mms_001', 'Cfa_001']), set(['Hsa_003',  'Hsa_001', 'Hsa_004'])],
            [set(['Hsa_003', 'Hsa_001']), set(['Mmu_001'])],
            [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])],
            ]
        expected_orthologs = set()
        for l1,l2 in orthologies:
            key1 = list(l1)
            key2 = list(l2)
            key1.sort()
            key2.sort()
            expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))

        # Are all orthologies as expected
        self.assertEqual(expected_orthologs, orthologs)

    def test_reconciliation(self):
        """ Tests ortholgy prediction based on the species reconciliation method"""
        gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
        species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"

        genetree = PhyloTree(gene_tree_nw)
        sptree = PhyloTree(species_tree_nw)

        recon_tree, events = genetree.reconcile(sptree)

        # Check that reconcilied tree nodes have the correct lables:
        # gene loss, duplication, etc.
        expected_recon = "((Dme_001:1.000000,Dme_002:1.000000)1.000000:1.000000[&&NHX:evoltype=D],(((Cfa_001:1.000000,Mms_001:1.000000)1.000000:1.000000[&&NHX:evoltype=S],((Hsa_001:1.000000,Ptr_001:1.000000)1.000000:1.000000[&&NHX:evoltype=S],Mmu_001:1.000000)1.000000:1.000000[&&NHX:evoltype=S])1.000000:1.000000[&&NHX:evoltype=S],((Mms:1.000000[&&NHX:evoltype=L],Cfa:1.000000[&&NHX:evoltype=L])1.000000:1.000000[&&NHX:evoltype=L],(((Hsa:1.000000[&&NHX:evoltype=L],Ptr_002:1.000000)1.000000:1.000000[&&NHX:evoltype=L],Mmu:1.000000[&&NHX:evoltype=L])1.000000:1.000000[&&NHX:evoltype=L],((Ptr:1.000000[&&NHX:evoltype=L],Hsa_002:1.000000)1.000000:1.000000[&&NHX:evoltype=L],Mmu_002:1.000000)1.000000:1.000000[&&NHX:evoltype=S])1.000000:1.000000[&&NHX:evoltype=D])1.000000:1.000000[&&NHX:evoltype=L])1.000000:1.000000[&&NHX:evoltype=D])[&&NHX:evoltype=S];"
        self.assertEqual(recon_tree.write(["evoltype"]), expected_recon)

    def test_miscelaneus(self):
        """ Test several things """
        # Creates a gene phylogeny with several duplication events at
        # different levels.
        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')

        # Create a dictionary with relative ages for the species present in
        # the phylogenetic tree.  Note that ages are only relative numbers to
        # define which species are older, and that different species can
        # belong to the same age.
        sp2age = {
          'Hsa': 1, # Homo sapiens (Hominids)
          'Ptr': 2, # P. troglodytes (primates)
          'Mmu': 2, # Macaca mulata (primates)
          'Mms': 3, # Mus musculus (mammals)
          'Cfa': 3, # Canis familiaris (mammals)
          'Dme': 4  # Drosophila melanogaster (metazoa)
        }


        # Check that dup ages are correct
        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
        self.assertEqual(dup1.get_age(sp2age), 2)
        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
        self.assertEqual(dup2.get_age(sp2age), 4)
        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
        self.assertEqual(dup3.get_age(sp2age), 3)
        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
        self.assertEqual(dup4.get_age(sp2age), 1)

        # Check is_monophyletic tests
        self.assert_(dup1.is_monophyletic(["Hsa", "Ptr", "Mmu"]))
        self.assert_(not dup1.is_monophyletic(["Hsa", "Ptr"]))
        self.assert_(not dup1.is_monophyletic(["Hsa", "Ptr", "Mms"]))

        # Check rooting options
        expected_root = t.search_nodes(name="Dme_002")[0]
        expected_root.dist += 2.3
        self.assertEqual(t.get_farthest_oldest_leaf(sp2age), expected_root)


        # Check get species functions
        self.assertEqual(t.get_species(), set(sp2age.keys()))
        self.assertEqual(set([sp for sp in t.iter_species()]), set(sp2age.keys()))


# Do not modify this dataset
expression = '#Names\tcol1\tcol2\tcol3\tcol4\tcol5\tcol6\tcol7\nA\t-1.23\t-0.81\t1.79\t0.78\t-0.42\t-0.69\t0.58\nB\t-1.76\t-0.94\t1.16\t0.36\t0.41\t-0.35\t1.12\nC\t-2.19\t0.13\t0.65\t-0.51\t0.52\t1.04\t0.36\nD\t-1.22\t-0.98\t0.79\t-0.76\t-0.29\t1.54\t0.93\nE\t-1.47\t-0.83\t0.85\t0.07\t-0.81\t1.53\t0.65\nF\t-1.04\t-1.11\t0.87\t-0.14\t-0.80\t1.74\t0.48\nG\t-1.57\t-1.17\t1.29\t0.23\t-0.20\t1.17\t0.26\nH\t-1.53\t-1.25\t0.59\t-0.30\t0.32\t1.41\t0.77\n'

class Test_Coretype_ArrayTable(unittest.TestCase):
    """ Tests reading clustering or phylogenetic profile data"""
    def test_arraytable_parser(self):
        """ Tests reading numneric tables"""
        A = ArrayTable(expression)
        self.assertEqual(A.get_row_vector("A").tolist(), \
                             [-1.23, -0.81, 1.79, 0.78,-0.42,-0.69, 0.58])
        self.assertEqual(A.get_several_row_vectors(["A","C"]).tolist(), \
                             [[-1.23, -0.81, 1.79, 0.78, -0.42, -0.69, 0.58],
                         [-2.19, 0.13, 0.65, -0.51, 0.52, 1.04, 0.36]])

        self.assertEqual(A.get_several_column_vectors(["col2", "col7"]).tolist(), \
                             [[-0.81000000000000005, -0.93999999999999995,\
                                0.13, -0.97999999999999998, -0.82999999999999996,\
                                    -1.1100000000000001, -1.1699999999999999,\
                                    -1.25],
                              [0.57999999999999996, 1.1200000000000001, \
                                   0.35999999999999999, 0.93000000000000005, \
                                   0.65000000000000002, 0.47999999999999998, \
                                   0.26000000000000001, 0.77000000000000002]])


        self.assertEqual(A.get_column_vector("col4").tolist(), \
                             [0.78000000000000003, 0.35999999999999999, \
                                  -0.51000000000000001, -0.76000000000000001, \
                                  0.070000000000000007, -0.14000000000000001, \
                                  0.23000000000000001, -0.29999999999999999])

        A.remove_column("col4")
        self.assert_(A.get_column_vector("col4") is None )

        Abis = A.merge_columns({"merged1": \
                                    ["col1", "col2"],\
                                    "merged2": \
                                    ["col5", "col6"]}, \
                                   "mean")


        #self.assert_((Abis.get_column_vector("merged1")==numpy.array([-1.02, -1.35, -1.03, -1.1, -1.15, -1.075, -1.37, -1.39, ])).all()==True )

        # Continue this......


class Test_ClusterTree(unittest.TestCase):
    """ Tests specific methods for trees linked to ArrayTables"""
    def test_clustertree(self):
        """ Tests tree-ArrayTable association """

        t = ClusterTree("(((A,B),(C,(D,E))),(F,(G,H)));", text_array=expression)
        # Now we can ask the expression profile of a single gene
        node = t.get_common_ancestor("C", "D", "E")
        self.assertEqual((t&"A").profile.tolist(), \
                             [-1.23, -0.81, 1.79, 0.78,-0.42,-0.69, 0.58])
        print node.profile
        print node.deviation
        print node.silhouette
        print node.intracluster_dist
        print node.intercluster_dist

        from ete2.clustering import clustvalidation
        c1 = t.get_common_ancestor("A", "B")
        c2 = t.get_common_ancestor("C", "D", "E")
        c3 = t.get_common_ancestor("F", "G", "H")
        print t.get_dunn([c1, c2, c3])

class Test_Treeview(unittest.TestCase):
    """ Tests visualization stuff """
    def test_rendering(self):
        """ Nothing yet, sorry """
        pass



class Test_R_bindings(unittest.TestCase):
    """ This is experimental """
    def test_ape(self):
        """ Link to R-ape package """
        return # Don't test anything from now
        try:
            import rpy2.robjects as robjects
        except ImportError:
            print "\nNo rpy2 support. Skipping.\n"
            return

        # R
        t1 = Tree(nw_simple1)
        t2 = Tree(nw_simple2)


        R = robjects.r
        R.library("ape")
        CONS =  R["consensus"]([asRphylo(t1), \
                                    asRphylo(t1), \
                                    asRphylo(t1), \
                                    asRphylo(t1), \
                                    asRphylo(t2)])
        t = asETE(CONS)

if __name__ == '__main__':
    unittest.main()
