doi abstract bibtex

Supertree methods merge a set of overlapping phylogenetic trees into a supertree containing all taxa of the input trees. The challenge in supertree reconstruction is the way of dealing with conflicting information in the input trees. Many different algorithms for different objective functions have been suggested to resolve these conflicts. In particular, there exist methods based on encoding the source trees in a matrix, where the supertree is constructed applying a local search heuristic to optimize the respective objective function. We present a novel heuristic supertree algorithm called Bad Clade Deletion (BCD) supertrees. It uses minimum cuts to delete a locally minimal number of columns from such a matrix representation so that it is compatible. This is the complement problem to Matrix Representation with Compatibility (Maximum Split Fit). Our algorithm has guaranteed polynomial worst-case running time and performs swiftly in practice. Different from local search heuristics, it guarantees to return the directed perfect phylogeny for the input matrix, corresponding to the parent tree of the input trees, if one exists. Comparing supertrees to model trees for simulated data, BCD shows a better accuracy (F1-score) than the state-of-the-art algorithms SuperFine (up to 3%) and Matrix Representation with Parsimony (up to 7%); at the same time, BCD is up to 7 times faster than SuperFine, and up to 600 times faster than Matrix Representation with Parsimony. Finally, using the BCD supertree as a starting tree for a combined Maximum Likelihood analysis using RAxML, we reach significantly improved accuracy (1% higher F1-score) and running time (1.7-fold speedup).

@Article{fleischauer17bad, author = {Markus Fleischauer and Sebastian B\"ocker}, title = {{B}ad {C}lade {D}eletion Supertrees: A Fast and Accurate Supertree Algorithm}, journal = {Mol Biol Evol}, year = {2017}, volume = {34}, pages = {2408-2421}, abstract = {Supertree methods merge a set of overlapping phylogenetic trees into a supertree containing all taxa of the input trees. The challenge in supertree reconstruction is the way of dealing with conflicting information in the input trees. Many different algorithms for different objective functions have been suggested to resolve these conflicts. In particular, there exist methods based on encoding the source trees in a matrix, where the supertree is constructed applying a local search heuristic to optimize the respective objective function. We present a novel heuristic supertree algorithm called Bad Clade Deletion (BCD) supertrees. It uses minimum cuts to delete a locally minimal number of columns from such a matrix representation so that it is compatible. This is the complement problem to Matrix Representation with Compatibility (Maximum Split Fit). Our algorithm has guaranteed polynomial worst-case running time and performs swiftly in practice. Different from local search heuristics, it guarantees to return the directed perfect phylogeny for the input matrix, corresponding to the parent tree of the input trees, if one exists. Comparing supertrees to model trees for simulated data, BCD shows a better accuracy (F1-score) than the state-of-the-art algorithms SuperFine (up to 3\%) and Matrix Representation with Parsimony (up to 7\%); at the same time, BCD is up to 7 times faster than SuperFine, and up to 600 times faster than Matrix Representation with Parsimony. Finally, using the BCD supertree as a starting tree for a combined Maximum Likelihood analysis using RAxML, we reach significantly improved accuracy (1\% higher F1-score) and running time (1.7-fold speedup).}, doi = {10.1093/molbev/msx191}, keywords = {jena; supertrees; bcd supertrees;}, owner = {Sebastian}, pmid = {28873954}, timestamp = {2017.06.25}, }

Downloads: 0