Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions examples/newick/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# newick Examples

Each sub-directory contains a self-contained example. The order in
which the examples are to appear is specified in `order.json` (an
array of directory names in the expected order).

In each example directory you'll find:

* `config.toml` - must conform to the specification outlined here:
https://docs.pyscript.net/latest/user-guide/configuration/ This is
parsed and ultimately turned into a JSON representation as part of
the package's API object.
* `setup.py` - Python code for contextual and environmental setup,
NOT SEEN BY THE END USER, but is run before the `code.py` code is
evaluated. Allows us to create useful (IPython) shims, avoid
repeating boilerplate and whatnot.
* `code.py` - the actual code added to the editor which forms the
practical example of using the package.
50 changes: 50 additions & 0 deletions examples/newick/building_and_pruning/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# ---------------------------------------------------------------------
# Build a tree node-by-node, then trim it down to a subset of leaves.
# ---------------------------------------------------------------------

heading("Building a tree from scratch")
note(
"We'll model a small file-system-like hierarchy using "
"<code>Node.create</code> and <code>add_descendant</code>."
)

# Build the root and its immediate children.
root = newick.Node.create(name="root")
docs = newick.Node.create(name="docs")
src = newick.Node.create(name="src")
tests = newick.Node.create(name="tests")
root.add_descendant(docs)
root.add_descendant(src)
root.add_descendant(tests)

# Add some leaves under each branch.
for leaf in ("intro.md", "guide.md"):
docs.add_descendant(newick.Node.create(name=leaf))
for leaf in ("main.py", "utils.py", "io.py"):
src.add_descendant(newick.Node.create(name=leaf))
for leaf in ("test_main.py", "test_io.py"):
tests.add_descendant(newick.Node.create(name=leaf))

display(HTML(f"<pre>{root.ascii_art()}</pre>"), append=True)

# Serialize to Newick text with `dumps`. Round-tripping with `loads`
# gives back an equivalent tree.
heading("Serializing to Newick")
serialized = newick.dumps(root)
note("The same tree as a Newick string:")
display(HTML(f"<pre>{serialized}</pre>"), append=True)

# Pruning: keep only the Python files. With `inverse=True`, the named
# nodes are the ones we *keep*; everything else gets pruned.
heading("Pruning to a subset of leaves")
note(
"We prune to keep only the <code>.py</code> files, then collapse "
"internal nodes that no longer branch."
)

python_files = [n.name for n in root.get_leaves() if n.name.endswith(".py")]
root.prune_by_names(python_files, inverse=True)
root.remove_redundant_nodes(keep_leaf_name=True)

display(HTML(f"<pre>{root.ascii_art()}</pre>"), append=True)
note(f"Remaining leaves: <code>{[n.name for n in root.get_leaves()]}</code>")
1 change: 1 addition & 0 deletions examples/newick/building_and_pruning/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["newick"]
20 changes: 20 additions & 0 deletions examples/newick/building_and_pruning/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Lighter setup for later cells: same names, no IPython shim."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(*args, **kwargs, target=__pyscript_display_target__)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import newick
50 changes: 50 additions & 0 deletions examples/newick/comments_and_annotations/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# ---------------------------------------------------------------------
# Many phylogenetics tools (BEAST, MrBayes, TreeAnnotator, ...) embed
# extra per-node data in Newick comments enclosed in square brackets.
# The `newick` package can parse these as structured properties when
# they follow the NHX or `&key=value,...` conventions.
# ---------------------------------------------------------------------

heading("Reading NHX-style annotations")
note(
"Here each tip carries a species code and a support value, "
"stored in the <code>[&&NHX:...]</code> comment after the node name."
)

annotated = (
"((mouse[&&NHX:species=Mus_musculus:support=98],"
"rat[&&NHX:species=Rattus_norvegicus:support=95])Rodentia"
"[&&NHX:support=99],"
"human[&&NHX:species=Homo_sapiens:support=100])Mammalia;"
)

tree = newick.loads(annotated)[0]

# `walk` yields every node in the tree. `Node.properties` is a dict
# parsed from the NHX-style comment; `Node.comment` is the raw text.
heading("Per-node properties")
for node in tree.walk():
if node.properties:
props = ", ".join(f"{k}={v}" for k, v in node.properties.items())
display(HTML(f"<li><code>{node.name}</code> &rarr; {props}</li>"), append=True)

# Comments can also be ignored entirely at parse time, which is handy
# when you just want the bare topology.
heading("Stripping comments")
bare = newick.loads(annotated, strip_comments=True)[0]
note("The same tree, parsed with <code>strip_comments=True</code>:")
display(HTML(f"<pre>{bare.newick}</pre>"), append=True)

# Quoted labels let you put otherwise-reserved characters in node names.
heading("Quoted labels")
note(
"Single-quoted labels can contain commas, parentheses, and colons. "
"Doubled single quotes <code>''</code> represent a literal apostrophe."
)
quoted = newick.loads("('Genus species (strain 1)','O''Brien lab isolate')Sample;")[0]
display(HTML(f"<pre>{quoted.ascii_art()}</pre>"), append=True)
for leaf in quoted.get_leaves():
display(HTML(
f"<li>name=<code>{leaf.name}</code>, "
f"unquoted=<code>{leaf.unquoted_name}</code></li>"
), append=True)
1 change: 1 addition & 0 deletions examples/newick/comments_and_annotations/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["newick"]
20 changes: 20 additions & 0 deletions examples/newick/comments_and_annotations/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Lighter setup for later cells: same names, no IPython shim."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(*args, **kwargs, target=__pyscript_display_target__)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import newick
5 changes: 5 additions & 0 deletions examples/newick/order.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"reading_newick_trees",
"building_and_pruning",
"comments_and_annotations"
]
39 changes: 39 additions & 0 deletions examples/newick/reading_newick_trees/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
A first look at the `newick` package.

The Newick format is a compact, parenthesis-based notation for trees,
widely used in bioinformatics to describe phylogenies. The `newick`
package parses Newick strings into a tree of `Node` objects you can
inspect and walk.

See https://en.wikipedia.org/wiki/Newick_format for a primer.
"""
from IPython.core.display import display, HTML

import newick


# A tiny phylogeny relating four primates. The numbers after the colons
# are branch lengths (e.g. millions of years of divergence).
primates = "((Human:6.0,Chimp:6.0)HomoPan:2.0,(Gorilla:8.0,Orangutan:14.0)Great:1.0)Primates;"

# `loads` parses a Newick string and returns a list of trees (Newick
# files can hold more than one), so we take the first.
tree = newick.loads(primates)[0]

heading("A primate phylogeny")
note(f"Root node name: <code>{tree.name}</code>")
note("Direct descendants of the root, with their branch lengths:")
for child in tree.descendants:
display(HTML(f"<li><code>{child.name}</code>: {child.length}</li>"), append=True)

# `ascii_art` draws the topology as text, which is great for a quick
# sanity check.
heading("Topology")
display(HTML(f"<pre>{tree.ascii_art()}</pre>"), append=True)

# `get_leaves` returns just the tip nodes (the species, in our case).
heading("Leaves")
note("The tip nodes correspond to the species at the leaves of the tree:")
leaf_names = [leaf.name for leaf in tree.get_leaves()]
display(HTML(f"<p><code>{leaf_names}</code></p>"), append=True)
1 change: 1 addition & 0 deletions examples/newick/reading_newick_trees/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["newick"]
36 changes: 36 additions & 0 deletions examples/newick/reading_newick_trees/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Shim setup for the first example. Includes the full IPython shim."""
import sys
import types
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(
*args, **kwargs, target=__pyscript_display_target__,
)


ipython = types.ModuleType("IPython")
core = types.ModuleType("IPython.core")
core_display = types.ModuleType("IPython.core.display")
core_display.display = display
core_display.HTML = HTML
ipython.core = core
core.display = core_display
ipython.get_ipython = lambda: None
ipython.display = core_display
sys.modules["IPython"] = ipython
sys.modules["IPython.core"] = core
sys.modules["IPython.core.display"] = core_display
sys.modules["IPython.display"] = core_display


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)