Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions readalongs/align.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Main readalongs module for aligning text and audio."""

import copy
import io
import os
import shutil
import sys
Expand Down Expand Up @@ -249,7 +248,7 @@ def read_noisedict(asr_config: soundswallower.Config) -> Set[str]:

def load_noisedict(fdict):
try:
with open(fdict, "rt", encoding="utf-8") as dictfh:
with open(fdict, encoding="utf-8") as dictfh:
noisewords = set()
for line in dictfh:
if line.startswith("##") or line.startswith(";;"):
Expand Down Expand Up @@ -364,15 +363,15 @@ def align_sequence(
# Generate dictionary and FSG for the current sequence of words
dict_data = make_dict(word_sequence.words, xml_path, unit=unit)
if save_temps is not None:
dict_file = io.open(save_temps + ".dict" + i_suffix, "wb")
dict_file = open(save_temps + ".dict" + i_suffix, "wb")
else:
dict_file = PortableNamedTemporaryFile(prefix="readalongs_dict_", delete=True)
dict_file.write(dict_data.encode("utf-8"))
dict_file.close()

fsg_data = make_fsg(word_sequence.words, xml_path)
if save_temps is not None:
fsg_file = io.open(save_temps + ".fsg" + i_suffix, "wb")
fsg_file = open(save_temps + ".fsg" + i_suffix, "wb")
else:
fsg_file = PortableNamedTemporaryFile(prefix="readalongs_fsg_", delete=True)
fsg_file.write(fsg_data.encode("utf-8"))
Expand Down
7 changes: 3 additions & 4 deletions readalongs/align_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Functions for saving alignments in various file formats.
"""

import io
from datetime import timedelta
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -399,7 +398,7 @@ def create_input_ras(**kwargs):
try:
if kwargs.get("input_file_name", False):
filename = kwargs["input_file_name"]
with io.open(kwargs["input_file_name"], encoding="utf-8-sig") as f:
with open(kwargs["input_file_name"], encoding="utf-8-sig") as f:
text = f.readlines()
elif kwargs.get("input_file_handle", False):
filename = kwargs["input_file_handle"].name
Expand All @@ -418,10 +417,10 @@ def create_input_ras(**kwargs):
save_temps = kwargs.get("save_temps", None)
if kwargs.get("output_file", False):
filename = kwargs["output_file"]
outfile = io.open(filename, "wb")
outfile = open(filename, "wb")
elif save_temps is not None:
filename = save_temps + ".input.readalong"
outfile = io.open(filename, "wb")
outfile = open(filename, "wb")
else:
outfile = PortableNamedTemporaryFile(
prefix="readalongs_xml_", suffix=".readalong", delete=True
Expand Down
2 changes: 1 addition & 1 deletion readalongs/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def make_xml(

make_xml_args = {param.name: param.default for param in cli.make_xml.params}
try:
with open(plaintextfile, "r", encoding="utf-8-sig") as plaintextfile_handle:
with open(plaintextfile, encoding="utf-8-sig") as plaintextfile_handle:
make_xml_args.update(
plaintextfile=plaintextfile_handle,
xmlfile=xmlfile,
Expand Down
8 changes: 4 additions & 4 deletions readalongs/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def make_xml(**kwargs):
_, filename = create_input_ras(
input_file_handle=input_file, text_languages=languages
)
with io.open(filename, encoding="utf-8-sig") as f:
with open(filename, encoding="utf-8-sig") as f:
sys.stdout.write(f.read())
else:
if not str(out_file).endswith(".readalong"):
Expand All @@ -564,7 +564,7 @@ def make_xml(**kwargs):
except (RuntimeError, OSError) as e:
raise click.UsageError(str(e)) from e

LOGGER.info("Wrote {}".format(out_file))
LOGGER.info(f"Wrote {out_file}")


@cli.command( # type: ignore # quench spurious mypy error: "Command" has no attribute "command"
Expand Down Expand Up @@ -637,7 +637,7 @@ def tokenize(**kwargs):
write_xml(sys.stdout.buffer, xml)
else:
save_xml(output_path, xml)
LOGGER.info("Wrote {}".format(output_path))
LOGGER.info(f"Wrote {output_path}")


@cli.command( # type: ignore # quench spurious mypy error: "Command" has no attribute "command"
Expand Down Expand Up @@ -759,7 +759,7 @@ def g2p(**kwargs):
write_xml(sys.stdout.buffer, xml)
else:
save_xml(output_path, xml)
LOGGER.info("Wrote {}".format(output_path))
LOGGER.info(f"Wrote {output_path}")

if not valid:
LOGGER.error(
Expand Down
2 changes: 0 additions & 2 deletions readalongs/epub/create_epub.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

######################################################################
#
# create_epub.py
#
######################################################################

from __future__ import absolute_import, division, print_function, unicode_literals

import os
import shutil
Expand Down
58 changes: 4 additions & 54 deletions readalongs/text/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
# TODO: Add Google standard format docstrings
############################################

import json
import os
import re
import zipfile
from collections import OrderedDict
from datetime import datetime
from io import TextIOWrapper
from typing import IO, Union

from lxml import etree
Expand Down Expand Up @@ -140,16 +137,10 @@ def parse_xml(xml_text: Union[str, bytes]) -> etree.ElementTree:
)


def load_xml_zip(zip_path, input_path) -> etree.ElementTree:
with zipfile.ZipFile(zip_path, "r") as fin_zip:
with fin_zip.open(input_path, "r") as fin:
return load_xml(fin)


def write_xml(output_filelike, xml):
"""Write XML to already opened file-like object"""
"""Write XML to already opened binary-mode file-like object"""
output_filelike.write(etree.tostring(xml, encoding="utf-8", xml_declaration=True))
output_filelike.write("\n".encode("utf-8"))
output_filelike.write(b"\n")


def save_xml(output_path, xml):
Expand All @@ -163,27 +154,11 @@ def xml_to_string(xml) -> str:
return etree.tostring(xml, encoding="utf-8", xml_declaration=True).decode()


def save_xml_zip(zip_path, output_path, xml):
ensure_dirs(zip_path)
with zipfile.ZipFile(zip_path, "a", compression=zipfile.ZIP_DEFLATED) as fout_zip:
fout_zip.writestr(
output_path,
etree.tostring(xml, encoding="utf-8", xml_declaration=True) + "\n",
)


def load_txt(input_path):
with open(input_path, "r", encoding="utf-8-sig") as fin:
with open(input_path, encoding="utf-8-sig") as fin:
return fin.read()


def load_txt_zip(zip_path, input_path):
with zipfile.ZipFile(zip_path, "r") as fin_zip:
with fin_zip.open(input_path, "r") as fin:
fin_utf8 = TextIOWrapper(fin, encoding="utf-8")
return fin_utf8.read()


def save_txt(output_path, txt):
ensure_dirs(output_path)
with open(output_path, "w", encoding="utf-8") as fout:
Expand All @@ -196,31 +171,6 @@ def save_txt_zip(zip_path, output_path, txt):
fout_zip.writestr(output_path, txt.encode("utf-8"))


def load_json(input_path):
with open(input_path, "r", encoding="utf-8-sig") as fin:
return json.load(fin, object_pairs_hook=OrderedDict)


def load_json_zip(zip_path, input_path):
with zipfile.ZipFile(zip_path, "r") as fin_zip:
with fin_zip.open(input_path, "r") as fin:
fin_utf8 = TextIOWrapper(fin, encoding="utf-8")
return json.loads(fin_utf8.read(), object_pairs_hook=OrderedDict)


def save_json(output_path, obj):
ensure_dirs(output_path)
with open(output_path, "w", encoding="utf-8") as fout:
fout.write(unicode(json.dumps(obj, ensure_ascii=False, indent=4)))


def save_json_zip(zip_path, output_path, obj):
ensure_dirs(zip_path)
txt = unicode(json.dumps(obj, ensure_ascii=False, indent=4))
with zipfile.ZipFile(zip_path, "a") as fout_zip:
fout_zip.writestr(output_path, txt.encode("utf-8"))


def copy_file_to_zip(zip_path, origin_path, destination_path):
ensure_dirs(zip_path)
with zipfile.ZipFile(zip_path, "a", compression=zipfile.ZIP_DEFLATED) as fout_zip:
Expand Down Expand Up @@ -383,7 +333,7 @@ def save_readme_txt(
):
# setup path for default WordPress upload directory
today = datetime.now()
wp_upload_folder = "/wp-content/uploads/{:%Y/%m}/".format(today)
wp_upload_folder = f"/wp-content/uploads/{today:%Y/%m}/"
with open(output_path, "w", encoding="utf-8") as fout:
fout.write(
TEMPLATE_README_TXT.format(
Expand Down
3 changes: 0 additions & 3 deletions readalongs/waveform2svg/audio_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

###################################################
#
Expand All @@ -14,10 +13,8 @@
#
###################################################

from __future__ import absolute_import, division, print_function, unicode_literals

import os
from io import open

import librosa # type: ignore
import numpy as np
Expand Down
2 changes: 0 additions & 2 deletions readalongs/waveform2svg/make_all_svgs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

########################################################
#
Expand All @@ -9,7 +8,6 @@
#
########################################################

from __future__ import absolute_import, division, print_function, unicode_literals

import argparse

Expand Down
2 changes: 0 additions & 2 deletions readalongs/waveform2svg/pitch2svg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

###################################################
#
Expand All @@ -14,7 +13,6 @@
#
###################################################

from __future__ import absolute_import, division, print_function, unicode_literals

import argparse
from math import floor
Expand Down
2 changes: 0 additions & 2 deletions readalongs/waveform2svg/waveform2svg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

###################################################
#
Expand All @@ -11,7 +10,6 @@
#
###################################################

from __future__ import absolute_import, division, print_function, unicode_literals

import argparse
from math import ceil, floor
Expand Down
2 changes: 1 addition & 1 deletion tests/basic_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def setUp(self):
else:
# Alternative tempdir code keeps it after running, for manual inspection:
tempdir_name = tempfile.mkdtemp(prefix=tempdir_prefix, dir=".")
print("tmpdir={}".format(tempdir_name))
print(f"tmpdir={tempdir_name}")
self.tempdir = Path(tempdir_name)

def tearDown(self):
Expand Down
5 changes: 2 additions & 3 deletions tests/test_align_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def test_invoke_align(self) -> None:
)
with open(
output / "tempfiles/output.tokenized.readalong",
"r",
encoding="utf-8",
) as f:
self.assertNotIn("\ufeff", f.read())
Expand Down Expand Up @@ -292,7 +291,7 @@ def test_align_english(self):
tokenized_file = join(
self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.readalong"
)
with open(tokenized_file, "r", encoding="utf8") as f:
with open(tokenized_file, encoding="utf8") as f:
tok_output = f.read()

self.assertIn(g2p_ref, tok_output)
Expand Down Expand Up @@ -616,7 +615,7 @@ def slurp_bin(filename):
return f.read()

def slurp_text(filename, encoding):
with open(filename, "r", encoding=encoding) as f:
with open(filename, encoding=encoding) as f:
return f.read()

base_file = write_file(self.tempdir / "add-bom-input.txt", "Random Text été")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_anchors_align_modes(self):
</body></doc>
"""
xml_file = os.path.join(self.tempdir, "text-with-anchors.readalong")
with open(xml_file, "wt", encoding="utf8") as f:
with open(xml_file, "w", encoding="utf8") as f:
print(xml_with_anchors, file=f)
with self.assertLogs(LOGGER, level="INFO") as cm:
with silence_c_stderr(), redirect_stderr(StringIO()):
Expand Down
3 changes: 1 addition & 2 deletions tests/test_dtd.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class TestDTD(TestCase):
"""Test the XML DTD"""

def setUp(self):
with open(DTDPATH, "rt") as infh:
with open(DTDPATH) as infh:
self.dtd = etree.DTD(infh)

def test_valid_inputs(self):
Expand Down Expand Up @@ -96,7 +96,6 @@ def test_backwards_compatibility(self):
os.path.join(
dirname(__file__), "..", "readalongs", "static", "read-along-1.0.dtd"
),
"rt",
) as dtdFile:
dtd = etree.DTD(dtdFile)
with open(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_force_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_align_switch_am(self):
with TemporaryDirectory(prefix="readalongs_am_") as tempdir:
custom_am_path = os.path.join(tempdir, "en-us")
shutil.copytree(get_model_path("en-us"), custom_am_path)
with open(os.path.join(custom_am_path, "noisedict"), "at") as fh:
with open(os.path.join(custom_am_path, "noisedict"), "a") as fh:
fh.write(";; here is a comment\n")
fh.write("[BOGUS] SIL\n")
with redirect_stderr(StringIO()):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_g2p_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def test_align_with_preg2p(self):
_ = align_audio(
text_file, audio_file, save_temps=os.path.join(self.tempdir, "foo")
)
with open(os.path.join(self.tempdir, "foo.dict"), "r", encoding="utf8") as f:
with open(os.path.join(self.tempdir, "foo.dict"), encoding="utf8") as f:
dict_file = f.read()
self.assertIn("S AH S IY", dict_file) # "ceci" in fra
self.assertIn("DH IH S", dict_file) # "this" in eng
Expand Down
2 changes: 1 addition & 1 deletion tests/test_make_xml_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class TestMakeXMLCli(BasicTestCase):
def setUp(self):
super().setUp()
self.empty_file = os.path.join(self.tempdir, "empty.txt")
with io.open(self.empty_file, "wb"):
with open(self.empty_file, "wb"):
pass

def test_invoke_prepare(self):
Expand Down
Loading
Loading