makeConsensusTree.pyΒΆ

#! /usr/bin/env python

desc = """

Write a majority rule consensus tree from 1 or more Nexus or Newick formatted
tree files. Each tree file can have a specified burnin and/or step count. The
data file (in Phylip or Nexus format) from which the trees are derived must be
supplied.

e.g.

$ makeConsensusTree.py -d dataFile.nex -t treeFile.run1.t treeFile.run2.t -b 10000 8000 -s 10 4

 - where treeFile.run1.t has burnin 10000 and step 10, and
   where treeFile.run2.t has burnin 8000 and step 4

Cymon J. Cox version 1.0 Tues 25 Oct 2011

"""
import os
import sys
import argparse
import textwrap
#Squash any local configuration that might cause circular imports:
os.environ['P4_STARTUP'] = ""
from p4 import *
from p4.treefilelite import TreeFileLite
try:
    from gram import TreeGram
except ImportError:
    print "Unable to import TreeGram - error."
    sys.exit()

def main(datafile,
        treefiles,
        burnin_numbers,
        step_numbers,
        outfile_name = "consensus.tree",
        restore_names = False,
        names_dict_path = "./p4_renameForPhylip_dict.py",
        re_root = False,
        percentage = 0.5,
        support_as_percentage = False,
        pdf = False,
        reportlab = False,
        scalebar = True,
        quiet = False
        ):

    if not datafile:
        print "Error: No datafile specified."
        print "Aborting."
        sys.exit(1)
    else:
        if not os.path.exists(datafile):
            print "Error: cannot find datafile %s" % datafile
            print "Aborting."
            sys.exit(1)
    for treefile in treefiles:
        if not os.path.exists(treefile):
            print "Error: cannot find treefile %s" % treefile
            print "Aborting."
            sys.exit(1)
    if not burnin_numbers:
        burnin_numbers = [0]*len(treefile)
    else:
        if not len(burnin_numbers) == len(treefiles):
            print "Error: there are %i burnin numbers and %i treefiles." % \
                    (len(burnin_numbers),len(treefiles))
            print "Aborting."
            sys.exit(1)
    if not step_numbers:
        step_numbers = [1]*len(treefile)
    else:
        if not len(step_numbers) == len(treefiles):
            print "Error: there are %i step_numbers and %i treefiles." % \
                    (len(step_numbers),len(treefiles))
            print "Aborting."
            sys.exit(1)
    if os.path.exists(outfile_name):
        print "Error: Outfile name \"%s\" already exists." % outfile_name
        print "Refusing to over-write. Aborting."
        sys.exit(1)
    if percentage:
        if percentage < 0.0 or percentage > 1.0:
            print "Error: percentage must be between 0.0 and 1.0 not %f" % \
                percentage
            print "Aborting."
            sys.exit(1)

    #read alignment
    var.doCheckForAllGapColumns = False
    var.doCheckForDuplicateSequences = False
    read(datafile)
    a = var.alignments[0]

    #read trees
    all_trees = []

    for treefile, burnin, step in zip(treefiles, burnin_numbers, step_numbers):
        try:
            ttl = TreeFileLite(treefile, verbose=0)
        except IndexError as e:
            if "list index out of range" in e:
                print "Failed to read tree file: %s" % treefile
                print "Check that file has 'end;' formatted"
            sys.exit()

        alltreeNumbers = range(ttl.nSamples)
        treeNumbers = alltreeNumbers[burnin::step]
        if len(treeNumbers) == 0:
            print "\nError: No trees left after removal of burnin - " + \
                  "burnin is %i, total trees in \"%s\" is %i" % \
                  (burnin, treefile, len(alltreeNumbers))
            print "Aborting."
            sys.exit(1)
        if not quiet:
            print "\nTrees in file \"%s\": %s" % (treefile, ttl.nSamples)
            if burnin:
                print "Number of trees burned-in: %s" % burnin
            if step:
                print "Trees sampled every %s trees" % step
            print "Trees read: %s" % len(treeNumbers)

        all_trees.extend([ttl.getTree(n) for n in treeNumbers])
        var.trees = []

    tt = Trees(all_trees, a.taxNames)
    fb = True
    #Are trees bi-rooted eg phylobayes
    for t in tt.trees:
        if t.root.getNChildren() == 2:
            t.removeRoot()
            fb = False
    if not quiet:
        if not fb:
            print "Some trees had a bifurcating root which was removed."
    tp = TreePartitions(tt)
    t = tp.consensus(minimumProportion=percentage)
    if restore_names:
            if not os.path.exists(names_dict_path):
                print "Error: Cannont find taxon names dictionary %s" % \
                    names_dict_path
                print "Aborting."
                sys.exit(1)
            else:
                t.restoreNamesFromRenameForPhylip(names_dict_path)
    if support_as_percentage:
        for node in t.iterInternalsNoRoot():
            node.name = "%i" % (100*node.br.support)
    else:
        for node in t.iterInternalsNoRoot():
            node.name = "%.2f" % node.br.support
    if re_root:
        t.draw(showInternalNodeNames=1,showNodeNums=1)
        while 1:
            nodenum = raw_input("Node number to re-root with... ")
            try:
               nodenum = int(nodenum)
            except ValueError:
                print "Try a number this time..."
                continue
            break
        t.reRoot(nodenum)
        t.ladderize()
        t.draw(showInternalNodeNames=1,showNodeNums=1)
    t.writeNexus(outfile_name)
    if not quiet:
        print "Written consensus tree to file \"%s\"" % outfile_name
    if pdf:
        if not TreeGram:
            print "TreeGram is not installed, cannot write PDF."
        else:
            tg = TreeGram(t)
            #For some users (e.g Cymon) reportlab is True by default but not
            #others - why is that?
            if reportlab:
                tg.rl = True
            else:
                tg.rl = False
            if scalebar:
                tg.setScaleBar()
            tg.epdf()
            if not quiet:
                if reportlab:
                    print "Written PDF to \"tg.pdf\" and \"cr_tg.pdf\"."
                else:
                    print "Written PDF to Gram directory."
    if not quiet:
        print "Done."

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description=textwrap.dedent(desc),
            )
    parser.add_argument("-d", "--datafile",
                        dest="datafile",
                        help="Data matrix filename.",
                        )
    parser.add_argument("-t", "-treefiles",
                        dest="treefiles",
                        help="Input tree file(s) in Nexus or Newick format.",
                        nargs='*')
    parser.add_argument("-b", "-burnin_numbers",
                        dest="burnin_numbers",
                        help="Burnin numbers for each treefile. " +\
                             "Default: 0 (all trees).",
                        nargs="*",
                        type=int)
    parser.add_argument("-s", "-step_numbers",
                        dest="step_numbers",
                        help="An interval or step for sampling each tree file. " +\
                        "Default: 1 (all trees)",
                        nargs="*",
                        type=int)
    parser.add_argument("-o", "--outfile_name",
                        dest="outfile_name",
                        help="Name of the output consensus tree file. " +\
                        "Default: consensus.tree",
                        default="consensus.tree"
                        )
    parser.add_argument("-g", "--restore_names",
                        dest="restore_names",
                        help="Restore taxon names. " +\
                        "Default: False",
                        default=False,
                        action='store_true'
                        )
    parser.add_argument("-j", "--path_to_names_dict",
                        dest="names_dict_path",
                        help="Path to the names dictionary. " +\
                        "Default: ./p4_renameForPhylip_dict.py",
                        default="./p4_renameForPhylip_dict.py"
                        )
    parser.add_argument("-p", "--percentage",
                        dest="percentage",
                        help="Majority-rule percentage, between 0.1-1.0. " +\
                        "Default: 0.5",
                        default=0.5,
                        type=float)
    parser.add_argument("-k", "--support_format",
                        dest="support_as_percentage",
                        help="Support values as percentages. " +\
                        "Default: formatted as probabilities (0.0->1.0)",
                        default=False,
                        action='store_true')
    parser.add_argument("-r", "--re_root",
                        dest="re_root",
                        help="Re-root tree before saving. " +\
                        "Default: False",
                        default=False,
                        action='store_true')
    parser.add_argument("-f", "--pdf",
                        dest="pdf",
                        help="Write a pdf of the tree. " +\
                        "Default: False",
                        default=False,
                        action="store_true")
    parser.add_argument("-l", "--use_reportlab",
                        dest="reportlab",
                        help="Write a pdf using reportlab. " +\
                        "Default: False (Use PGF/TikZ)",
                        default=False,
                        action="store_true")
    parser.add_argument("-c", "--scalebar",
                        dest="scalebar",
                        help="Include scale bar on pdf " +\
                        "Default: True",
                        default=True,
                        action="store_false")
    parser.add_argument("-q", "--quiet",
                        dest="quiet",
                        help="Quiet. Default: False",
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    main(args.datafile, args.treefiles, args.burnin_numbers, args.step_numbers,
            args.outfile_name, args.restore_names, args.names_dict_path,
            args.re_root, args.percentage, args.support_as_percentage,
            args.pdf, args.reportlab, args.scalebar, args.quiet)