diff --git a/examples/newick/README.md b/examples/newick/README.md
new file mode 100644
index 0000000..90ab530
--- /dev/null
+++ b/examples/newick/README.md
@@ -0,0 +1,18 @@
+# newick Examples
+
+Each sub-directory contains a self-contained example. The order in
+which the examples are to appear is specified in `order.json` (an
+array of directory names in the expected order).
+
+In each example directory you'll find:
+
+* `config.toml` - must conform to the specification outlined here:
+ https://docs.pyscript.net/latest/user-guide/configuration/ This is
+ parsed and ultimately turned into a JSON representation as part of
+ the package's API object.
+* `setup.py` - Python code for contextual and environmental setup,
+ NOT SEEN BY THE END USER, but is run before the `code.py` code is
+ evaluated. Allows us to create useful (IPython) shims, avoid
+ repeating boilerplate and whatnot.
+* `code.py` - the actual code added to the editor which forms the
+ practical example of using the package.
diff --git a/examples/newick/building_and_pruning/code.py b/examples/newick/building_and_pruning/code.py
new file mode 100644
index 0000000..f3af94a
--- /dev/null
+++ b/examples/newick/building_and_pruning/code.py
@@ -0,0 +1,50 @@
+# ---------------------------------------------------------------------
+# Build a tree node-by-node, then trim it down to a subset of leaves.
+# ---------------------------------------------------------------------
+
+heading("Building a tree from scratch")
+note(
+ "We'll model a small file-system-like hierarchy using "
+ "Node.create and add_descendant."
+)
+
+# Build the root and its immediate children.
+root = newick.Node.create(name="root")
+docs = newick.Node.create(name="docs")
+src = newick.Node.create(name="src")
+tests = newick.Node.create(name="tests")
+root.add_descendant(docs)
+root.add_descendant(src)
+root.add_descendant(tests)
+
+# Add some leaves under each branch.
+for leaf in ("intro.md", "guide.md"):
+ docs.add_descendant(newick.Node.create(name=leaf))
+for leaf in ("main.py", "utils.py", "io.py"):
+ src.add_descendant(newick.Node.create(name=leaf))
+for leaf in ("test_main.py", "test_io.py"):
+ tests.add_descendant(newick.Node.create(name=leaf))
+
+display(HTML(f"
{root.ascii_art()}"), append=True)
+
+# Serialize to Newick text with `dumps`. Round-tripping with `loads`
+# gives back an equivalent tree.
+heading("Serializing to Newick")
+serialized = newick.dumps(root)
+note("The same tree as a Newick string:")
+display(HTML(f"{serialized}"), append=True)
+
+# Pruning: keep only the Python files. With `inverse=True`, the named
+# nodes are the ones we *keep*; everything else gets pruned.
+heading("Pruning to a subset of leaves")
+note(
+ "We prune to keep only the .py files, then collapse "
+ "internal nodes that no longer branch."
+)
+
+python_files = [n.name for n in root.get_leaves() if n.name.endswith(".py")]
+root.prune_by_names(python_files, inverse=True)
+root.remove_redundant_nodes(keep_leaf_name=True)
+
+display(HTML(f"{root.ascii_art()}"), append=True)
+note(f"Remaining leaves: {[n.name for n in root.get_leaves()]}")
diff --git a/examples/newick/building_and_pruning/config.toml b/examples/newick/building_and_pruning/config.toml
new file mode 100644
index 0000000..a41ad4b
--- /dev/null
+++ b/examples/newick/building_and_pruning/config.toml
@@ -0,0 +1 @@
+packages = ["newick"]
diff --git a/examples/newick/building_and_pruning/setup.py b/examples/newick/building_and_pruning/setup.py
new file mode 100644
index 0000000..423f556
--- /dev/null
+++ b/examples/newick/building_and_pruning/setup.py
@@ -0,0 +1,20 @@
+"""Lighter setup for later cells: same names, no IPython shim."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+ return _display(*args, **kwargs, target=__pyscript_display_target__)
+
+
+def heading(text, level=2):
+ display(HTML(f"{text}
"), append=True) + + +import newick diff --git a/examples/newick/comments_and_annotations/code.py b/examples/newick/comments_and_annotations/code.py new file mode 100644 index 0000000..6ac841a --- /dev/null +++ b/examples/newick/comments_and_annotations/code.py @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------- +# Many phylogenetics tools (BEAST, MrBayes, TreeAnnotator, ...) embed +# extra per-node data in Newick comments enclosed in square brackets. +# The `newick` package can parse these as structured properties when +# they follow the NHX or `&key=value,...` conventions. +# --------------------------------------------------------------------- + +heading("Reading NHX-style annotations") +note( + "Here each tip carries a species code and a support value, " + "stored in the[&&NHX:...] comment after the node name."
+)
+
+annotated = (
+ "((mouse[&&NHX:species=Mus_musculus:support=98],"
+ "rat[&&NHX:species=Rattus_norvegicus:support=95])Rodentia"
+ "[&&NHX:support=99],"
+ "human[&&NHX:species=Homo_sapiens:support=100])Mammalia;"
+)
+
+tree = newick.loads(annotated)[0]
+
+# `walk` yields every node in the tree. `Node.properties` is a dict
+# parsed from the NHX-style comment; `Node.comment` is the raw text.
+heading("Per-node properties")
+for node in tree.walk():
+ if node.properties:
+ props = ", ".join(f"{k}={v}" for k, v in node.properties.items())
+ display(HTML(f"{node.name} → {props}strip_comments=True:")
+display(HTML(f"{bare.newick}"), append=True)
+
+# Quoted labels let you put otherwise-reserved characters in node names.
+heading("Quoted labels")
+note(
+ "Single-quoted labels can contain commas, parentheses, and colons. "
+ "Doubled single quotes '' represent a literal apostrophe."
+)
+quoted = newick.loads("('Genus species (strain 1)','O''Brien lab isolate')Sample;")[0]
+display(HTML(f"{quoted.ascii_art()}"), append=True)
+for leaf in quoted.get_leaves():
+ display(HTML(
+ f"{leaf.name}, "
+ f"unquoted={leaf.unquoted_name}{text}
"), append=True) + + +import newick diff --git a/examples/newick/order.json b/examples/newick/order.json new file mode 100644 index 0000000..4194bed --- /dev/null +++ b/examples/newick/order.json @@ -0,0 +1,5 @@ +[ + "reading_newick_trees", + "building_and_pruning", + "comments_and_annotations" +] diff --git a/examples/newick/reading_newick_trees/code.py b/examples/newick/reading_newick_trees/code.py new file mode 100644 index 0000000..08c29da --- /dev/null +++ b/examples/newick/reading_newick_trees/code.py @@ -0,0 +1,39 @@ +""" +A first look at the `newick` package. + +The Newick format is a compact, parenthesis-based notation for trees, +widely used in bioinformatics to describe phylogenies. The `newick` +package parses Newick strings into a tree of `Node` objects you can +inspect and walk. + +See https://en.wikipedia.org/wiki/Newick_format for a primer. +""" +from IPython.core.display import display, HTML + +import newick + + +# A tiny phylogeny relating four primates. The numbers after the colons +# are branch lengths (e.g. millions of years of divergence). +primates = "((Human:6.0,Chimp:6.0)HomoPan:2.0,(Gorilla:8.0,Orangutan:14.0)Great:1.0)Primates;" + +# `loads` parses a Newick string and returns a list of trees (Newick +# files can hold more than one), so we take the first. +tree = newick.loads(primates)[0] + +heading("A primate phylogeny") +note(f"Root node name:{tree.name}")
+note("Direct descendants of the root, with their branch lengths:")
+for child in tree.descendants:
+ display(HTML(f"{child.name}: {child.length}{tree.ascii_art()}"), append=True)
+
+# `get_leaves` returns just the tip nodes (the species, in our case).
+heading("Leaves")
+note("The tip nodes correspond to the species at the leaves of the tree:")
+leaf_names = [leaf.name for leaf in tree.get_leaves()]
+display(HTML(f"{leaf_names}
{text}
"), append=True)