diff --git a/readalongs/align.py b/readalongs/align.py index e679e72d..f4ab9e93 100644 --- a/readalongs/align.py +++ b/readalongs/align.py @@ -1,7 +1,6 @@ """Main readalongs module for aligning text and audio.""" import copy -import io import os import shutil import sys @@ -249,7 +248,7 @@ def read_noisedict(asr_config: soundswallower.Config) -> Set[str]: def load_noisedict(fdict): try: - with open(fdict, "rt", encoding="utf-8") as dictfh: + with open(fdict, encoding="utf-8") as dictfh: noisewords = set() for line in dictfh: if line.startswith("##") or line.startswith(";;"): @@ -364,7 +363,7 @@ def align_sequence( # Generate dictionary and FSG for the current sequence of words dict_data = make_dict(word_sequence.words, xml_path, unit=unit) if save_temps is not None: - dict_file = io.open(save_temps + ".dict" + i_suffix, "wb") + dict_file = open(save_temps + ".dict" + i_suffix, "wb") else: dict_file = PortableNamedTemporaryFile(prefix="readalongs_dict_", delete=True) dict_file.write(dict_data.encode("utf-8")) @@ -372,7 +371,7 @@ def align_sequence( fsg_data = make_fsg(word_sequence.words, xml_path) if save_temps is not None: - fsg_file = io.open(save_temps + ".fsg" + i_suffix, "wb") + fsg_file = open(save_temps + ".fsg" + i_suffix, "wb") else: fsg_file = PortableNamedTemporaryFile(prefix="readalongs_fsg_", delete=True) fsg_file.write(fsg_data.encode("utf-8")) diff --git a/readalongs/align_utils.py b/readalongs/align_utils.py index 7d9415f7..11feb4db 100644 --- a/readalongs/align_utils.py +++ b/readalongs/align_utils.py @@ -2,7 +2,6 @@ Functions for saving alignments in various file formats. """ -import io from datetime import timedelta from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union @@ -399,7 +398,7 @@ def create_input_ras(**kwargs): try: if kwargs.get("input_file_name", False): filename = kwargs["input_file_name"] - with io.open(kwargs["input_file_name"], encoding="utf-8-sig") as f: + with open(kwargs["input_file_name"], encoding="utf-8-sig") as f: text = f.readlines() elif kwargs.get("input_file_handle", False): filename = kwargs["input_file_handle"].name @@ -418,10 +417,10 @@ def create_input_ras(**kwargs): save_temps = kwargs.get("save_temps", None) if kwargs.get("output_file", False): filename = kwargs["output_file"] - outfile = io.open(filename, "wb") + outfile = open(filename, "wb") elif save_temps is not None: filename = save_temps + ".input.readalong" - outfile = io.open(filename, "wb") + outfile = open(filename, "wb") else: outfile = PortableNamedTemporaryFile( prefix="readalongs_xml_", suffix=".readalong", delete=True diff --git a/readalongs/api.py b/readalongs/api.py index b6d25e99..165b2498 100644 --- a/readalongs/api.py +++ b/readalongs/api.py @@ -166,7 +166,7 @@ def make_xml( make_xml_args = {param.name: param.default for param in cli.make_xml.params} try: - with open(plaintextfile, "r", encoding="utf-8-sig") as plaintextfile_handle: + with open(plaintextfile, encoding="utf-8-sig") as plaintextfile_handle: make_xml_args.update( plaintextfile=plaintextfile_handle, xmlfile=xmlfile, diff --git a/readalongs/cli.py b/readalongs/cli.py index 643d2070..aa5d5be3 100644 --- a/readalongs/cli.py +++ b/readalongs/cli.py @@ -546,7 +546,7 @@ def make_xml(**kwargs): _, filename = create_input_ras( input_file_handle=input_file, text_languages=languages ) - with io.open(filename, encoding="utf-8-sig") as f: + with open(filename, encoding="utf-8-sig") as f: sys.stdout.write(f.read()) else: if not str(out_file).endswith(".readalong"): @@ -564,7 +564,7 @@ def make_xml(**kwargs): except (RuntimeError, OSError) as e: raise click.UsageError(str(e)) from e - LOGGER.info("Wrote {}".format(out_file)) + LOGGER.info(f"Wrote {out_file}") @cli.command( # type: ignore # quench spurious mypy error: "Command" has no attribute "command" @@ -637,7 +637,7 @@ def tokenize(**kwargs): write_xml(sys.stdout.buffer, xml) else: save_xml(output_path, xml) - LOGGER.info("Wrote {}".format(output_path)) + LOGGER.info(f"Wrote {output_path}") @cli.command( # type: ignore # quench spurious mypy error: "Command" has no attribute "command" @@ -759,7 +759,7 @@ def g2p(**kwargs): write_xml(sys.stdout.buffer, xml) else: save_xml(output_path, xml) - LOGGER.info("Wrote {}".format(output_path)) + LOGGER.info(f"Wrote {output_path}") if not valid: LOGGER.error( diff --git a/readalongs/epub/create_epub.py b/readalongs/epub/create_epub.py index a62c2a7b..250198cf 100644 --- a/readalongs/epub/create_epub.py +++ b/readalongs/epub/create_epub.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- ###################################################################### # @@ -7,7 +6,6 @@ # ###################################################################### -from __future__ import absolute_import, division, print_function, unicode_literals import os import shutil diff --git a/readalongs/text/util.py b/readalongs/text/util.py index d802c3d4..0514565b 100644 --- a/readalongs/text/util.py +++ b/readalongs/text/util.py @@ -7,13 +7,10 @@ # TODO: Add Google standard format docstrings ############################################ -import json import os import re import zipfile -from collections import OrderedDict from datetime import datetime -from io import TextIOWrapper from typing import IO, Union from lxml import etree @@ -140,16 +137,10 @@ def parse_xml(xml_text: Union[str, bytes]) -> etree.ElementTree: ) -def load_xml_zip(zip_path, input_path) -> etree.ElementTree: - with zipfile.ZipFile(zip_path, "r") as fin_zip: - with fin_zip.open(input_path, "r") as fin: - return load_xml(fin) - - def write_xml(output_filelike, xml): - """Write XML to already opened file-like object""" + """Write XML to already opened binary-mode file-like object""" output_filelike.write(etree.tostring(xml, encoding="utf-8", xml_declaration=True)) - output_filelike.write("\n".encode("utf-8")) + output_filelike.write(b"\n") def save_xml(output_path, xml): @@ -163,27 +154,11 @@ def xml_to_string(xml) -> str: return etree.tostring(xml, encoding="utf-8", xml_declaration=True).decode() -def save_xml_zip(zip_path, output_path, xml): - ensure_dirs(zip_path) - with zipfile.ZipFile(zip_path, "a", compression=zipfile.ZIP_DEFLATED) as fout_zip: - fout_zip.writestr( - output_path, - etree.tostring(xml, encoding="utf-8", xml_declaration=True) + "\n", - ) - - def load_txt(input_path): - with open(input_path, "r", encoding="utf-8-sig") as fin: + with open(input_path, encoding="utf-8-sig") as fin: return fin.read() -def load_txt_zip(zip_path, input_path): - with zipfile.ZipFile(zip_path, "r") as fin_zip: - with fin_zip.open(input_path, "r") as fin: - fin_utf8 = TextIOWrapper(fin, encoding="utf-8") - return fin_utf8.read() - - def save_txt(output_path, txt): ensure_dirs(output_path) with open(output_path, "w", encoding="utf-8") as fout: @@ -196,31 +171,6 @@ def save_txt_zip(zip_path, output_path, txt): fout_zip.writestr(output_path, txt.encode("utf-8")) -def load_json(input_path): - with open(input_path, "r", encoding="utf-8-sig") as fin: - return json.load(fin, object_pairs_hook=OrderedDict) - - -def load_json_zip(zip_path, input_path): - with zipfile.ZipFile(zip_path, "r") as fin_zip: - with fin_zip.open(input_path, "r") as fin: - fin_utf8 = TextIOWrapper(fin, encoding="utf-8") - return json.loads(fin_utf8.read(), object_pairs_hook=OrderedDict) - - -def save_json(output_path, obj): - ensure_dirs(output_path) - with open(output_path, "w", encoding="utf-8") as fout: - fout.write(unicode(json.dumps(obj, ensure_ascii=False, indent=4))) - - -def save_json_zip(zip_path, output_path, obj): - ensure_dirs(zip_path) - txt = unicode(json.dumps(obj, ensure_ascii=False, indent=4)) - with zipfile.ZipFile(zip_path, "a") as fout_zip: - fout_zip.writestr(output_path, txt.encode("utf-8")) - - def copy_file_to_zip(zip_path, origin_path, destination_path): ensure_dirs(zip_path) with zipfile.ZipFile(zip_path, "a", compression=zipfile.ZIP_DEFLATED) as fout_zip: @@ -383,7 +333,7 @@ def save_readme_txt( ): # setup path for default WordPress upload directory today = datetime.now() - wp_upload_folder = "/wp-content/uploads/{:%Y/%m}/".format(today) + wp_upload_folder = f"/wp-content/uploads/{today:%Y/%m}/" with open(output_path, "w", encoding="utf-8") as fout: fout.write( TEMPLATE_README_TXT.format( diff --git a/readalongs/waveform2svg/audio_util.py b/readalongs/waveform2svg/audio_util.py index d274e2a9..5ef2427e 100644 --- a/readalongs/waveform2svg/audio_util.py +++ b/readalongs/waveform2svg/audio_util.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- ################################################### # @@ -14,10 +13,8 @@ # ################################################### -from __future__ import absolute_import, division, print_function, unicode_literals import os -from io import open import librosa # type: ignore import numpy as np diff --git a/readalongs/waveform2svg/make_all_svgs.py b/readalongs/waveform2svg/make_all_svgs.py index 972fec57..905469a9 100644 --- a/readalongs/waveform2svg/make_all_svgs.py +++ b/readalongs/waveform2svg/make_all_svgs.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- ######################################################## # @@ -9,7 +8,6 @@ # ######################################################## -from __future__ import absolute_import, division, print_function, unicode_literals import argparse diff --git a/readalongs/waveform2svg/pitch2svg.py b/readalongs/waveform2svg/pitch2svg.py index 4880f0db..50d666fa 100644 --- a/readalongs/waveform2svg/pitch2svg.py +++ b/readalongs/waveform2svg/pitch2svg.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- ################################################### # @@ -14,7 +13,6 @@ # ################################################### -from __future__ import absolute_import, division, print_function, unicode_literals import argparse from math import floor diff --git a/readalongs/waveform2svg/waveform2svg.py b/readalongs/waveform2svg/waveform2svg.py index 2098b597..b1902263 100644 --- a/readalongs/waveform2svg/waveform2svg.py +++ b/readalongs/waveform2svg/waveform2svg.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- ################################################### # @@ -11,7 +10,6 @@ # ################################################### -from __future__ import absolute_import, division, print_function, unicode_literals import argparse from math import ceil, floor diff --git a/tests/basic_test_case.py b/tests/basic_test_case.py index 50f18703..c48baab3 100644 --- a/tests/basic_test_case.py +++ b/tests/basic_test_case.py @@ -54,7 +54,7 @@ def setUp(self): else: # Alternative tempdir code keeps it after running, for manual inspection: tempdir_name = tempfile.mkdtemp(prefix=tempdir_prefix, dir=".") - print("tmpdir={}".format(tempdir_name)) + print(f"tmpdir={tempdir_name}") self.tempdir = Path(tempdir_name) def tearDown(self): diff --git a/tests/test_align_cli.py b/tests/test_align_cli.py index f0cef5eb..317b02c0 100755 --- a/tests/test_align_cli.py +++ b/tests/test_align_cli.py @@ -89,7 +89,6 @@ def test_invoke_align(self) -> None: ) with open( output / "tempfiles/output.tokenized.readalong", - "r", encoding="utf-8", ) as f: self.assertNotIn("\ufeff", f.read()) @@ -292,7 +291,7 @@ def test_align_english(self): tokenized_file = join( self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.readalong" ) - with open(tokenized_file, "r", encoding="utf8") as f: + with open(tokenized_file, encoding="utf8") as f: tok_output = f.read() self.assertIn(g2p_ref, tok_output) @@ -616,7 +615,7 @@ def slurp_bin(filename): return f.read() def slurp_text(filename, encoding): - with open(filename, "r", encoding=encoding) as f: + with open(filename, encoding=encoding) as f: return f.read() base_file = write_file(self.tempdir / "add-bom-input.txt", "Random Text été") diff --git a/tests/test_anchors.py b/tests/test_anchors.py index 515e9bf8..04a099d2 100755 --- a/tests/test_anchors.py +++ b/tests/test_anchors.py @@ -85,7 +85,7 @@ def test_anchors_align_modes(self): """ xml_file = os.path.join(self.tempdir, "text-with-anchors.readalong") - with open(xml_file, "wt", encoding="utf8") as f: + with open(xml_file, "w", encoding="utf8") as f: print(xml_with_anchors, file=f) with self.assertLogs(LOGGER, level="INFO") as cm: with silence_c_stderr(), redirect_stderr(StringIO()): diff --git a/tests/test_dtd.py b/tests/test_dtd.py index 3396c0d8..45d199b2 100644 --- a/tests/test_dtd.py +++ b/tests/test_dtd.py @@ -43,7 +43,7 @@ class TestDTD(TestCase): """Test the XML DTD""" def setUp(self): - with open(DTDPATH, "rt") as infh: + with open(DTDPATH) as infh: self.dtd = etree.DTD(infh) def test_valid_inputs(self): @@ -96,7 +96,6 @@ def test_backwards_compatibility(self): os.path.join( dirname(__file__), "..", "readalongs", "static", "read-along-1.0.dtd" ), - "rt", ) as dtdFile: dtd = etree.DTD(dtdFile) with open( diff --git a/tests/test_force_align.py b/tests/test_force_align.py index 7b79a812..471825e2 100755 --- a/tests/test_force_align.py +++ b/tests/test_force_align.py @@ -133,7 +133,7 @@ def test_align_switch_am(self): with TemporaryDirectory(prefix="readalongs_am_") as tempdir: custom_am_path = os.path.join(tempdir, "en-us") shutil.copytree(get_model_path("en-us"), custom_am_path) - with open(os.path.join(custom_am_path, "noisedict"), "at") as fh: + with open(os.path.join(custom_am_path, "noisedict"), "a") as fh: fh.write(";; here is a comment\n") fh.write("[BOGUS] SIL\n") with redirect_stderr(StringIO()): diff --git a/tests/test_g2p_cli.py b/tests/test_g2p_cli.py index f8c71b9b..b5063de2 100755 --- a/tests/test_g2p_cli.py +++ b/tests/test_g2p_cli.py @@ -338,7 +338,7 @@ def test_align_with_preg2p(self): _ = align_audio( text_file, audio_file, save_temps=os.path.join(self.tempdir, "foo") ) - with open(os.path.join(self.tempdir, "foo.dict"), "r", encoding="utf8") as f: + with open(os.path.join(self.tempdir, "foo.dict"), encoding="utf8") as f: dict_file = f.read() self.assertIn("S AH S IY", dict_file) # "ceci" in fra self.assertIn("DH IH S", dict_file) # "this" in eng diff --git a/tests/test_make_xml_cli.py b/tests/test_make_xml_cli.py index 4f754efa..4f0b93de 100755 --- a/tests/test_make_xml_cli.py +++ b/tests/test_make_xml_cli.py @@ -31,7 +31,7 @@ class TestMakeXMLCli(BasicTestCase): def setUp(self): super().setUp() self.empty_file = os.path.join(self.tempdir, "empty.txt") - with io.open(self.empty_file, "wb"): + with open(self.empty_file, "wb"): pass def test_invoke_prepare(self): diff --git a/tests/test_misc.py b/tests/test_misc.py index e6937613..a9ec95ed 100755 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -5,7 +5,6 @@ import itertools import os import sys -import zipfile import click from lxml import etree @@ -20,7 +19,6 @@ get_lang_attrib, get_word_text, load_xml, - load_xml_zip, parse_time, parse_xml, save_txt, @@ -276,15 +274,6 @@ def test_save_txt(self): loaded_xml = load_xml(filename) self.assertEqual(etree.tostring(loaded_xml), xml_text.encode(encoding="ascii")) - def test_load_xml_zip(self): - xml_text = 'text' - with zipfile.ZipFile(self.tempdir / "file.zip", "w") as myzip: - myzip.writestr("file.readalong", xml_text) - self.assertEqual( - etree.tostring(load_xml_zip(self.tempdir / "file.zip", "file.readalong")), - xml_text.encode(encoding="ascii"), - ) - def test_capture_logs(self): with capture_logs() as captured_logs: LOGGER.info("foo bar baz") diff --git a/tests/test_temp_file.py b/tests/test_temp_file.py index f31a4dfc..768041c3 100755 --- a/tests/test_temp_file.py +++ b/tests/test_temp_file.py @@ -31,7 +31,7 @@ def test_ntf(self): tf.write("Some text") # LOGGER.debug("tf.name {}".format(tf.name)) tf.close() - readf = open(tf.name, mode="r", encoding="utf8") + readf = open(tf.name, encoding="utf8") text = readf.readline() self.assertEqual(text, "Some text") readf.close() @@ -45,7 +45,7 @@ def test_delete_false(self): tf.write("Some text") tf.close() # LOGGER.info(tf.name) - readf = open(tf.name, mode="r", encoding="utf8") + readf = open(tf.name, encoding="utf8") text = readf.readline() readf.close() self.assertEqual(text, "Some text") @@ -64,7 +64,7 @@ def test_typical_usage(self): tf.write("Some text") tf.close() # LOGGER.info(tf.name) - readf = open(tf.name, mode="r", encoding="utf8") + readf = open(tf.name, encoding="utf8") text = readf.readline() readf.close() self.assertEqual(text, "Some text") @@ -79,7 +79,7 @@ def test_using_with(self): tf.close() # LOGGER.info(tf.name) filename = tf.name - readf = open(tf.name, mode="r", encoding="utf8") + readf = open(tf.name, encoding="utf8") text = readf.readline() readf.close() self.assertEqual(text, "Some text") diff --git a/tests/test_tokenize_cli.py b/tests/test_tokenize_cli.py index f3b3b27b..e730e229 100755 --- a/tests/test_tokenize_cli.py +++ b/tests/test_tokenize_cli.py @@ -2,7 +2,6 @@ """Test suite for readalongs tokenize""" -import io import os import sys @@ -45,7 +44,7 @@ def test_generate_output_name(self): def test_with_stdin(self): """Test readalongs reading from stdin and writing to stdout""" - with io.open(self.rasfile, encoding="utf8") as f: + with open(self.rasfile, encoding="utf8") as f: inputtext = f.read() results = self.runner.invoke(tokenize, "-", input=inputtext) self.assertEqual(results.exit_code, 0)