makeConsensusTree.pyΒΆ

#! /usr/bin/env python

desc = """

Write a majority rule consensus tree from 1 or more Nexus or Newick formatted
tree files. Each tree file can have a specified burnin and/or step count. The
data file (in Phylip or Nexus format) from which the trees are derived must be
supplied.

e.g.

$ makeConsensusTree.py dataFile.nex -t treeFile.run1.t treeFile.run2.t -b 10000 8000 -s 10 4

 - where treeFile.run1.t has burnin 10000 and step 10, and
   where treeFile.run2.t has burnin 8000 and step 4

Note: there is a 4GB memory and/or a 10,000 tree limit. If you wish to exceed
      these limits you need to alter the code.

Cymon J. Cox version 1.1 Fri 10 Nov 2017

"""
import os
import sys
import argparse
import textwrap
import resource
#Squash any local configuration that might cause circular imports:
os.environ['P4_STARTUP'] = ""
from p4 import *
from p4.treefilelite import TreeFileLite
try:
    from gram import TreeGram
except ImportError:
    print "Unable to import TreeGram - error."
    print "You can get it here: https://github.com/pgfoster/gram"
    sys.exit()

# Memory limited to 4GB if you need more you need to alter the code
rsrc = resource.RLIMIT_AS
#resource.setrlimit(rsrc, (4e+9, hard))
soft, hard = resource.getrlimit(rsrc)
resource.setrlimit(rsrc, (4e+9, hard)) # 4GB memory limit

def main(datafile,
        treefiles,
        burnin_numbers,
        step_numbers,
        ignore_advice = False,
        outfile_name = "consensus.tree",
        restore_names = False,
        names_dict_path = "./p4_renameForPhylip_dict.py",
        re_root = False,
        percentage = 0.5,
        support_as_percentage = False,
        pdf = False,
        reportlab = False,
        scalebar = True,
        quiet = False
        ):

    if not datafile:
        print "Error: No datafile specified."
        print "Aborting."
        sys.exit(1)
    else:
        if not os.path.exists(datafile):
            print "Error: cannot find datafile %s" % datafile
            print "Aborting."
            sys.exit(1)
    for treefile in treefiles:
        if not os.path.exists(treefile):
            print "Error: cannot find treefile %s" % treefile
            print "Aborting."
            sys.exit(1)
    if not burnin_numbers:
        burnin_numbers = [0]*len(treefile)
    else:
        if not len(burnin_numbers) == len(treefiles):
            print "Error: there are %i burnin numbers and %i treefiles." % \
                    (len(burnin_numbers),len(treefiles))
            print "Aborting."
            sys.exit(1)
    if not step_numbers:
        step_numbers = [1]*len(treefile)
    else:
        if not len(step_numbers) == len(treefiles):
            print "Error: there are %i step_numbers and %i treefiles." % \
                    (len(step_numbers),len(treefiles))
            print "Aborting."
            sys.exit(1)
    if os.path.exists(outfile_name):
        print "Error: Outfile name \"%s\" already exists." % outfile_name
        print "Refusing to over-write. Aborting."
        sys.exit(1)
    if percentage:
        if percentage < 0.0 or percentage > 1.0:
            print "Error: percentage must be between 0.0 and 1.0 not %f" % \
                percentage
            print "Aborting."
            sys.exit(1)

    #read alignment
    var.doCheckForAllGapColumns = False
    var.doCheckForDuplicateSequences = False
    read(datafile)
    a = var.alignments[0]

    #read trees
    all_trees = []
    total_number_of_trees_to_read = 0

    for treefile, burnin, step in zip(treefiles, burnin_numbers, step_numbers):

        try:
            ttl = TreeFileLite(treefile, verbose=0)
        except (IndexError, MemoryError) as e:
            if "list index out of range" in e:
                print "Failed to read tree file: %s" % treefile
                print "Check that file has 'end;' formatted"
            else:
                print "\n!!! A memory error was thrown - there is a 4GB limit !!!\n"
            sys.exit()

        alltreeNumbers = range(ttl.nSamples)
        treeNumbers = alltreeNumbers[burnin::step]

        if len(treeNumbers) == 0:
            print "\nError: No trees left after removal of burnin - " + \
                  "burnin is %i, total trees in \"%s\" is %i" % \
                  (burnin, treefile, len(alltreeNumbers))
            print "Aborting."
            sys.exit(1)

        #Check tree numbers
        total_number_of_trees_to_read += len(treeNumbers)
        if total_number_of_trees_to_read >= 10000:
            if ignore_advice:
                print "... the ignoreAdvice flag is set so reading beyond 10,000 trees..."
            else:
                print "Refusing to read > 10,000 trees - you don't (normally) need huge "
                print "numbers of trees - 1,000 -> 10,000 is usually sufficent"
                print "If you really, really, want to read more trees, you can use the -a "
                print "flag to bypass this message, but remember you then run the risk of "
                print "running out of memory, or more drastically crashing the machine"
                sys.exit()

        if not quiet:
            print "\nTrees in file \"%s\": %s" % (treefile, ttl.nSamples)
            if burnin:
                print "Number of trees burned-in: %s" % burnin
            if step:
                print "Trees sampled every %s trees" % step
            print "Trees to read: %s" % len(treeNumbers)


        try:
            all_trees.extend([ttl.getTree(n) for n in treeNumbers])
        except MemoryError as e:
            print "\n!!! A memory error was thrown - there is a 4GB limit !!!\n"
            sys.exit()
        var.trees = []

    tt = Trees(all_trees, a.taxNames)
    fb = True
    #Are trees bi-rooted eg phylobayes
    for t in tt.trees:
        if t.root.getNChildren() == 2:
            t.removeRoot()
            fb = False
    if not quiet:
        if not fb:
            print "Some trees had a bifurcating root which was removed."
    tp = TreePartitions(tt)
    t = tp.consensus(minimumProportion=percentage)
    if restore_names:
            if not os.path.exists(names_dict_path):
                print "Error: Cannont find taxon names dictionary %s" % \
                    names_dict_path
                print "Aborting."
                sys.exit(1)
            else:
                t.restoreNamesFromRenameForPhylip(names_dict_path)
    if support_as_percentage:
        for node in t.iterInternalsNoRoot():
            node.name = "%i" % (100*node.br.support)
    else:
        for node in t.iterInternalsNoRoot():
            node.name = "%.2f" % node.br.support
    if re_root:
        t.draw(showInternalNodeNames=1,showNodeNums=1)
        while 1:
            nodenum = raw_input("Node number to re-root with... ")
            try:
               nodenum = int(nodenum)
            except ValueError:
                print "Try a number this time..."
                continue
            break
        t.reRoot(nodenum)
        t.ladderize()
        t.draw(showInternalNodeNames=1,showNodeNums=1)
    t.writeNexus(outfile_name)
    if not quiet:
        print "Written consensus tree to file \"%s\"" % outfile_name
    if pdf:
        if not TreeGram:
            print "TreeGram is not installed, cannot write PDF."
        else:
            tg = TreeGram(t)
            #For some users (e.g Cymon) reportlab is True by default but not
            #others - why is that?
            if reportlab:
                tg.rl = True
            else:
                tg.rl = False
            if scalebar:
                tg.setScaleBar()
            tg.epdf()
            if not quiet:
                if reportlab:
                    print "Written PDF to \"tg.pdf\" and \"cr_tg.pdf\"."
                else:
                    print "Written PDF to Gram directory."
    if not quiet:
        print "Done."

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description=textwrap.dedent(desc),
            )
    parser.add_argument("datafile",
                        #dest="datafile",
                        help="Data matrix filename.",
                        )
    parser.add_argument("-t", "--treefiles",
                        dest="treefiles",
                        help="Input tree file(s) in Nexus or Newick format.",
                        nargs='*')
    parser.add_argument("-b", "-burnin_numbers",
                        dest="burnin_numbers",
                        help="Burnin numbers for each treefile. " +\
                             "Default: 0 (all trees).",
                        nargs="*",
                        type=int)
    parser.add_argument("-s", "-step_numbers",
                        dest="step_numbers",
                        help="An interval or step for sampling each tree file. " +\
                        "Default: 1 (all trees)",
                        nargs="*",
                        type=int)
    parser.add_argument("-a", "-ignore_advice",
                        dest="ignore_advice",
                        help="Ignore tree file read limit advice. " +\
                            "Default: False",
                        default=False,
                        action='store_true')
    parser.add_argument("-o", "--outfile_name",
                        dest="outfile_name",
                        help="Name of the output consensus tree file. " +\
                        "Default: consensus.tree",
                        default="consensus.tree"
                        )
    parser.add_argument("-g", "--restore_names",
                        dest="restore_names",
                        help="Restore taxon names. " +\
                        "Default: False",
                        default=False,
                        action='store_true'
                        )
    parser.add_argument("-j", "--path_to_names_dict",
                        dest="names_dict_path",
                        help="Path to the names dictionary. " +\
                        "Default: ./p4_renameForPhylip_dict.py",
                        default="./p4_renameForPhylip_dict.py"
                        )
    parser.add_argument("-p", "--percentage",
                        dest="percentage",
                        help="Majority-rule percentage, between 0.1-1.0. " +\
                        "Default: 0.5",
                        default=0.5,
                        type=float)
    parser.add_argument("-k", "--support_format",
                        dest="support_as_percentage",
                        help="Support values as percentages. " +\
                        "Default: formatted as probabilities (0.0->1.0)",
                        default=False,
                        action='store_true')
    parser.add_argument("-r", "--re_root",
                        dest="re_root",
                        help="Re-root tree before saving. " +\
                        "Default: False",
                        default=False,
                        action='store_true')
    parser.add_argument("-f", "--pdf",
                        dest="pdf",
                        help="Write a pdf of the tree. " +\
                        "Default: False",
                        default=False,
                        action="store_true")
    parser.add_argument("-l", "--use_reportlab",
                        dest="reportlab",
                        help="Write a pdf using reportlab. " +\
                        "Default: False (Use PGF/TikZ)",
                        default=False,
                        action="store_true")
    parser.add_argument("-c", "--scalebar",
                        dest="scalebar",
                        help="Include scale bar on pdf " +\
                        "Default: True",
                        default=True,
                        action="store_false")
    parser.add_argument("-q", "--quiet",
                        dest="quiet",
                        help="Quiet. Default: False",
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    main(args.datafile, args.treefiles, args.burnin_numbers, args.step_numbers,
            args.ignore_advice, args.outfile_name, args.restore_names, args.names_dict_path,
            args.re_root, args.percentage, args.support_as_percentage,
            args.pdf, args.reportlab, args.scalebar, args.quiet)