diff --git a/benchmarks/bench_cache_apply_parameters.py b/benchmarks/bench_cache_apply_parameters.py new file mode 100644 index 0000000000..77cd4ef926 --- /dev/null +++ b/benchmarks/bench_cache_apply_parameters.py @@ -0,0 +1,78 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Micro-benchmark: apply_parameters caching. + +Measures the speedup from caching parameterized type creation +in _CassandraType.apply_parameters(). + +Run: + python benchmarks/bench_cache_apply_parameters.py +""" +import timeit +from cassandra.cqltypes import ( + MapType, SetType, ListType, TupleType, + Int32Type, UTF8Type, FloatType, DoubleType, BooleanType, + _CassandraType, +) + + +def bench_apply_parameters(): + """Benchmark apply_parameters with cache (repeated calls).""" + cache = _CassandraType._apply_parameters_cache + + # Warm up the cache + MapType.apply_parameters([UTF8Type, Int32Type]) + SetType.apply_parameters([FloatType]) + ListType.apply_parameters([DoubleType]) + TupleType.apply_parameters([Int32Type, UTF8Type, BooleanType]) + + calls = [ + (MapType, [UTF8Type, Int32Type]), + (SetType, [FloatType]), + (ListType, [DoubleType]), + (TupleType, [Int32Type, UTF8Type, BooleanType]), + ] + + def run_cached(): + for cls, subtypes in calls: + cls.apply_parameters(subtypes) + + # Benchmark cached path + n = 100_000 + t_cached = timeit.timeit(run_cached, number=n) + print(f"Cached apply_parameters ({len(calls)} types x {n} iters): " + f"{t_cached:.3f}s ({t_cached / (n * len(calls)) * 1e6:.2f} us/call)") + + # Benchmark uncached path (clear cache each iteration) + def run_uncached(): + for cls, subtypes in calls: + cache.clear() + cls.apply_parameters(subtypes) + + t_uncached = timeit.timeit(run_uncached, number=n) + print(f"Uncached apply_parameters ({len(calls)} types x {n} iters): " + f"{t_uncached:.3f}s ({t_uncached / (n * len(calls)) * 1e6:.2f} us/call)") + + speedup = t_uncached / t_cached + print(f"Speedup: {speedup:.1f}x") + + +def main(): + bench_apply_parameters() + + +if __name__ == '__main__': + main() diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 547a13c979..11ab694cb3 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -275,6 +275,7 @@ class _CassandraType(object, metaclass=CassandraTypeType): subtypes = () num_subtypes = 0 empty_binary_ok = False + _apply_parameters_cache = {} support_empty_values = False """ @@ -373,8 +374,15 @@ def apply_parameters(cls, subtypes, names=None): if cls.num_subtypes != 'UNKNOWN' and len(subtypes) != cls.num_subtypes: raise ValueError("%s types require %d subtypes (%d given)" % (cls.typename, cls.num_subtypes, len(subtypes))) + subtypes = tuple(subtypes) + cache_key = (cls, subtypes, tuple(names) if names else names) + cached = cls._apply_parameters_cache.get(cache_key) + if cached is not None: + return cached newname = cls.cass_parameterized_type_with(subtypes) - return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names}) + result = type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names}) + cls._apply_parameters_cache[cache_key] = result + return result @classmethod def cql_parameterized_type(cls): diff --git a/tests/unit/test_cache_apply_parameters.py b/tests/unit/test_cache_apply_parameters.py new file mode 100644 index 0000000000..58f41f6acf --- /dev/null +++ b/tests/unit/test_cache_apply_parameters.py @@ -0,0 +1,90 @@ +""" +Unit tests for apply_parameters caching in _CassandraType. +""" +import unittest +from cassandra.cqltypes import ( + MapType, SetType, ListType, TupleType, + Int32Type, UTF8Type, FloatType, DoubleType, BooleanType, + _CassandraType, +) + + +class TestApplyParametersCache(unittest.TestCase): + + def setUp(self): + _CassandraType._apply_parameters_cache.clear() + + def test_cache_returns_same_object(self): + """Repeated apply_parameters calls return the exact same class object.""" + result1 = MapType.apply_parameters([UTF8Type, Int32Type]) + result2 = MapType.apply_parameters([UTF8Type, Int32Type]) + self.assertIs(result1, result2) + + def test_cache_different_subtypes_different_results(self): + """Different subtype combinations produce different cached classes.""" + r1 = MapType.apply_parameters([UTF8Type, Int32Type]) + r2 = MapType.apply_parameters([Int32Type, UTF8Type]) + self.assertIsNot(r1, r2) + + def test_cache_different_base_types(self): + """Different base types with same subtypes produce different classes.""" + r1 = SetType.apply_parameters([Int32Type]) + r2 = ListType.apply_parameters([Int32Type]) + self.assertIsNot(r1, r2) + + def test_cached_type_has_correct_subtypes(self): + """Cached types preserve their subtype information.""" + result = MapType.apply_parameters([UTF8Type, FloatType]) + self.assertEqual(result.subtypes, (UTF8Type, FloatType)) + # Call again, verify cache hit still has correct subtypes + result2 = MapType.apply_parameters([UTF8Type, FloatType]) + self.assertEqual(result2.subtypes, (UTF8Type, FloatType)) + + def test_cached_type_has_correct_cassname(self): + """Cached types preserve their cassname.""" + result = SetType.apply_parameters([DoubleType]) + self.assertEqual(result.cassname, SetType.cassname) + + def test_cached_type_with_names(self): + """Caching works correctly with named parameters (UDT-style).""" + r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name']) + r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name']) + self.assertIs(r1, r2) + + def test_different_names_different_cache_entries(self): + """Different names produce different cached classes.""" + r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name']) + r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['key', 'value']) + self.assertIsNot(r1, r2) + + def test_names_none_vs_no_names(self): + """Passing names=None and not passing names use the same cache entry.""" + r1 = MapType.apply_parameters([UTF8Type, Int32Type], names=None) + r2 = MapType.apply_parameters([UTF8Type, Int32Type]) + self.assertIs(r1, r2) + + def test_tuple_subtypes_accepted(self): + """Both list and tuple subtypes produce the same cached result.""" + r1 = MapType.apply_parameters([UTF8Type, Int32Type]) + r2 = MapType.apply_parameters((UTF8Type, Int32Type)) + self.assertIs(r1, r2) + + def test_cache_populated(self): + """The cache dict is populated after apply_parameters calls.""" + _CassandraType._apply_parameters_cache.clear() + MapType.apply_parameters([UTF8Type, Int32Type]) + self.assertGreater(len(_CassandraType._apply_parameters_cache), 0) + + def test_cache_clear_forces_new_creation(self): + """Clearing the cache forces new type creation.""" + r1 = MapType.apply_parameters([UTF8Type, Int32Type]) + _CassandraType._apply_parameters_cache.clear() + r2 = MapType.apply_parameters([UTF8Type, Int32Type]) + # After clearing, we get a new class (different object identity) + self.assertIsNot(r1, r2) + # But they should be functionally equivalent + self.assertEqual(r1.subtypes, r2.subtypes) + + +if __name__ == '__main__': + unittest.main()