diff --git a/examples/newick/README.md b/examples/newick/README.md new file mode 100644 index 0000000..90ab530 --- /dev/null +++ b/examples/newick/README.md @@ -0,0 +1,18 @@ +# newick Examples + +Each sub-directory contains a self-contained example. The order in +which the examples are to appear is specified in `order.json` (an +array of directory names in the expected order). + +In each example directory you'll find: + +* `config.toml` - must conform to the specification outlined here: + https://docs.pyscript.net/latest/user-guide/configuration/ This is + parsed and ultimately turned into a JSON representation as part of + the package's API object. +* `setup.py` - Python code for contextual and environmental setup, + NOT SEEN BY THE END USER, but is run before the `code.py` code is + evaluated. Allows us to create useful (IPython) shims, avoid + repeating boilerplate and whatnot. +* `code.py` - the actual code added to the editor which forms the + practical example of using the package. diff --git a/examples/newick/building_and_pruning/code.py b/examples/newick/building_and_pruning/code.py new file mode 100644 index 0000000..f3af94a --- /dev/null +++ b/examples/newick/building_and_pruning/code.py @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------- +# Build a tree node-by-node, then trim it down to a subset of leaves. +# --------------------------------------------------------------------- + +heading("Building a tree from scratch") +note( + "We'll model a small file-system-like hierarchy using " + "Node.create and add_descendant." +) + +# Build the root and its immediate children. +root = newick.Node.create(name="root") +docs = newick.Node.create(name="docs") +src = newick.Node.create(name="src") +tests = newick.Node.create(name="tests") +root.add_descendant(docs) +root.add_descendant(src) +root.add_descendant(tests) + +# Add some leaves under each branch. +for leaf in ("intro.md", "guide.md"): + docs.add_descendant(newick.Node.create(name=leaf)) +for leaf in ("main.py", "utils.py", "io.py"): + src.add_descendant(newick.Node.create(name=leaf)) +for leaf in ("test_main.py", "test_io.py"): + tests.add_descendant(newick.Node.create(name=leaf)) + +display(HTML(f"
{root.ascii_art()}
"), append=True) + +# Serialize to Newick text with `dumps`. Round-tripping with `loads` +# gives back an equivalent tree. +heading("Serializing to Newick") +serialized = newick.dumps(root) +note("The same tree as a Newick string:") +display(HTML(f"
{serialized}
"), append=True) + +# Pruning: keep only the Python files. With `inverse=True`, the named +# nodes are the ones we *keep*; everything else gets pruned. +heading("Pruning to a subset of leaves") +note( + "We prune to keep only the .py files, then collapse " + "internal nodes that no longer branch." +) + +python_files = [n.name for n in root.get_leaves() if n.name.endswith(".py")] +root.prune_by_names(python_files, inverse=True) +root.remove_redundant_nodes(keep_leaf_name=True) + +display(HTML(f"
{root.ascii_art()}
"), append=True) +note(f"Remaining leaves: {[n.name for n in root.get_leaves()]}") diff --git a/examples/newick/building_and_pruning/config.toml b/examples/newick/building_and_pruning/config.toml new file mode 100644 index 0000000..a41ad4b --- /dev/null +++ b/examples/newick/building_and_pruning/config.toml @@ -0,0 +1 @@ +packages = ["newick"] diff --git a/examples/newick/building_and_pruning/setup.py b/examples/newick/building_and_pruning/setup.py new file mode 100644 index 0000000..423f556 --- /dev/null +++ b/examples/newick/building_and_pruning/setup.py @@ -0,0 +1,20 @@ +"""Lighter setup for later cells: same names, no IPython shim.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display(*args, **kwargs, target=__pyscript_display_target__) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import newick diff --git a/examples/newick/comments_and_annotations/code.py b/examples/newick/comments_and_annotations/code.py new file mode 100644 index 0000000..6ac841a --- /dev/null +++ b/examples/newick/comments_and_annotations/code.py @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------- +# Many phylogenetics tools (BEAST, MrBayes, TreeAnnotator, ...) embed +# extra per-node data in Newick comments enclosed in square brackets. +# The `newick` package can parse these as structured properties when +# they follow the NHX or `&key=value,...` conventions. +# --------------------------------------------------------------------- + +heading("Reading NHX-style annotations") +note( + "Here each tip carries a species code and a support value, " + "stored in the [&&NHX:...] comment after the node name." +) + +annotated = ( + "((mouse[&&NHX:species=Mus_musculus:support=98]," + "rat[&&NHX:species=Rattus_norvegicus:support=95])Rodentia" + "[&&NHX:support=99]," + "human[&&NHX:species=Homo_sapiens:support=100])Mammalia;" +) + +tree = newick.loads(annotated)[0] + +# `walk` yields every node in the tree. `Node.properties` is a dict +# parsed from the NHX-style comment; `Node.comment` is the raw text. +heading("Per-node properties") +for node in tree.walk(): + if node.properties: + props = ", ".join(f"{k}={v}" for k, v in node.properties.items()) + display(HTML(f"
  • {node.name} → {props}
  • "), append=True) + +# Comments can also be ignored entirely at parse time, which is handy +# when you just want the bare topology. +heading("Stripping comments") +bare = newick.loads(annotated, strip_comments=True)[0] +note("The same tree, parsed with strip_comments=True:") +display(HTML(f"
    {bare.newick}
    "), append=True) + +# Quoted labels let you put otherwise-reserved characters in node names. +heading("Quoted labels") +note( + "Single-quoted labels can contain commas, parentheses, and colons. " + "Doubled single quotes '' represent a literal apostrophe." +) +quoted = newick.loads("('Genus species (strain 1)','O''Brien lab isolate')Sample;")[0] +display(HTML(f"
    {quoted.ascii_art()}
    "), append=True) +for leaf in quoted.get_leaves(): + display(HTML( + f"
  • name={leaf.name}, " + f"unquoted={leaf.unquoted_name}
  • " + ), append=True) diff --git a/examples/newick/comments_and_annotations/config.toml b/examples/newick/comments_and_annotations/config.toml new file mode 100644 index 0000000..a41ad4b --- /dev/null +++ b/examples/newick/comments_and_annotations/config.toml @@ -0,0 +1 @@ +packages = ["newick"] diff --git a/examples/newick/comments_and_annotations/setup.py b/examples/newick/comments_and_annotations/setup.py new file mode 100644 index 0000000..423f556 --- /dev/null +++ b/examples/newick/comments_and_annotations/setup.py @@ -0,0 +1,20 @@ +"""Lighter setup for later cells: same names, no IPython shim.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display(*args, **kwargs, target=__pyscript_display_target__) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

    {text}

    "), append=True) + + +import newick diff --git a/examples/newick/order.json b/examples/newick/order.json new file mode 100644 index 0000000..4194bed --- /dev/null +++ b/examples/newick/order.json @@ -0,0 +1,5 @@ +[ + "reading_newick_trees", + "building_and_pruning", + "comments_and_annotations" +] diff --git a/examples/newick/reading_newick_trees/code.py b/examples/newick/reading_newick_trees/code.py new file mode 100644 index 0000000..08c29da --- /dev/null +++ b/examples/newick/reading_newick_trees/code.py @@ -0,0 +1,39 @@ +""" +A first look at the `newick` package. + +The Newick format is a compact, parenthesis-based notation for trees, +widely used in bioinformatics to describe phylogenies. The `newick` +package parses Newick strings into a tree of `Node` objects you can +inspect and walk. + +See https://en.wikipedia.org/wiki/Newick_format for a primer. +""" +from IPython.core.display import display, HTML + +import newick + + +# A tiny phylogeny relating four primates. The numbers after the colons +# are branch lengths (e.g. millions of years of divergence). +primates = "((Human:6.0,Chimp:6.0)HomoPan:2.0,(Gorilla:8.0,Orangutan:14.0)Great:1.0)Primates;" + +# `loads` parses a Newick string and returns a list of trees (Newick +# files can hold more than one), so we take the first. +tree = newick.loads(primates)[0] + +heading("A primate phylogeny") +note(f"Root node name: {tree.name}") +note("Direct descendants of the root, with their branch lengths:") +for child in tree.descendants: + display(HTML(f"
  • {child.name}: {child.length}
  • "), append=True) + +# `ascii_art` draws the topology as text, which is great for a quick +# sanity check. +heading("Topology") +display(HTML(f"
    {tree.ascii_art()}
    "), append=True) + +# `get_leaves` returns just the tip nodes (the species, in our case). +heading("Leaves") +note("The tip nodes correspond to the species at the leaves of the tree:") +leaf_names = [leaf.name for leaf in tree.get_leaves()] +display(HTML(f"

    {leaf_names}

    "), append=True) diff --git a/examples/newick/reading_newick_trees/config.toml b/examples/newick/reading_newick_trees/config.toml new file mode 100644 index 0000000..a41ad4b --- /dev/null +++ b/examples/newick/reading_newick_trees/config.toml @@ -0,0 +1 @@ +packages = ["newick"] diff --git a/examples/newick/reading_newick_trees/setup.py b/examples/newick/reading_newick_trees/setup.py new file mode 100644 index 0000000..5986b1d --- /dev/null +++ b/examples/newick/reading_newick_trees/setup.py @@ -0,0 +1,36 @@ +"""Shim setup for the first example. Includes the full IPython shim.""" +import sys +import types +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display( + *args, **kwargs, target=__pyscript_display_target__, + ) + + +ipython = types.ModuleType("IPython") +core = types.ModuleType("IPython.core") +core_display = types.ModuleType("IPython.core.display") +core_display.display = display +core_display.HTML = HTML +ipython.core = core +core.display = core_display +ipython.get_ipython = lambda: None +ipython.display = core_display +sys.modules["IPython"] = ipython +sys.modules["IPython.core"] = core +sys.modules["IPython.core.display"] = core_display +sys.modules["IPython.display"] = core_display + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

    {text}

    "), append=True)