Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions benchmarks/bench_cache_apply_parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Micro-benchmark: apply_parameters caching.

Measures the speedup from caching parameterized type creation
in _CassandraType.apply_parameters().

Run:
python benchmarks/bench_cache_apply_parameters.py
"""
import timeit
from cassandra.cqltypes import (
MapType, SetType, ListType, TupleType,
Int32Type, UTF8Type, FloatType, DoubleType, BooleanType,
_CassandraType,
)


def bench_apply_parameters():
"""Benchmark apply_parameters with cache (repeated calls)."""
cache = _CassandraType._apply_parameters_cache

# Warm up the cache
MapType.apply_parameters([UTF8Type, Int32Type])
SetType.apply_parameters([FloatType])
ListType.apply_parameters([DoubleType])
TupleType.apply_parameters([Int32Type, UTF8Type, BooleanType])

calls = [
(MapType, [UTF8Type, Int32Type]),
(SetType, [FloatType]),
(ListType, [DoubleType]),
(TupleType, [Int32Type, UTF8Type, BooleanType]),
]

def run_cached():
for cls, subtypes in calls:
cls.apply_parameters(subtypes)

# Benchmark cached path
n = 100_000
t_cached = timeit.timeit(run_cached, number=n)
print(f"Cached apply_parameters ({len(calls)} types x {n} iters): "
f"{t_cached:.3f}s ({t_cached / (n * len(calls)) * 1e6:.2f} us/call)")

# Benchmark uncached path (clear cache each iteration)
def run_uncached():
for cls, subtypes in calls:
cache.clear()
cls.apply_parameters(subtypes)

t_uncached = timeit.timeit(run_uncached, number=n)
print(f"Uncached apply_parameters ({len(calls)} types x {n} iters): "
f"{t_uncached:.3f}s ({t_uncached / (n * len(calls)) * 1e6:.2f} us/call)")

speedup = t_uncached / t_cached
print(f"Speedup: {speedup:.1f}x")


def main():
bench_apply_parameters()


if __name__ == '__main__':
main()
10 changes: 9 additions & 1 deletion cassandra/cqltypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ class _CassandraType(object, metaclass=CassandraTypeType):
subtypes = ()
num_subtypes = 0
empty_binary_ok = False
_apply_parameters_cache = {}

support_empty_values = False
"""
Expand Down Expand Up @@ -373,8 +374,15 @@ def apply_parameters(cls, subtypes, names=None):
if cls.num_subtypes != 'UNKNOWN' and len(subtypes) != cls.num_subtypes:
raise ValueError("%s types require %d subtypes (%d given)"
% (cls.typename, cls.num_subtypes, len(subtypes)))
subtypes = tuple(subtypes)
cache_key = (cls, subtypes, tuple(names) if names else names)
cached = cls._apply_parameters_cache.get(cache_key)
if cached is not None:
return cached
newname = cls.cass_parameterized_type_with(subtypes)
return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
result = type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
cls._apply_parameters_cache[cache_key] = result
return result

@classmethod
def cql_parameterized_type(cls):
Expand Down
90 changes: 90 additions & 0 deletions tests/unit/test_cache_apply_parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""
Unit tests for apply_parameters caching in _CassandraType.
"""
import unittest
from cassandra.cqltypes import (
MapType, SetType, ListType, TupleType,
Int32Type, UTF8Type, FloatType, DoubleType, BooleanType,
_CassandraType,
)


class TestApplyParametersCache(unittest.TestCase):

def setUp(self):
_CassandraType._apply_parameters_cache.clear()

def test_cache_returns_same_object(self):
"""Repeated apply_parameters calls return the exact same class object."""
result1 = MapType.apply_parameters([UTF8Type, Int32Type])
result2 = MapType.apply_parameters([UTF8Type, Int32Type])
self.assertIs(result1, result2)

def test_cache_different_subtypes_different_results(self):
"""Different subtype combinations produce different cached classes."""
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
r2 = MapType.apply_parameters([Int32Type, UTF8Type])
self.assertIsNot(r1, r2)

def test_cache_different_base_types(self):
"""Different base types with same subtypes produce different classes."""
r1 = SetType.apply_parameters([Int32Type])
r2 = ListType.apply_parameters([Int32Type])
self.assertIsNot(r1, r2)

def test_cached_type_has_correct_subtypes(self):
"""Cached types preserve their subtype information."""
result = MapType.apply_parameters([UTF8Type, FloatType])
self.assertEqual(result.subtypes, (UTF8Type, FloatType))
# Call again, verify cache hit still has correct subtypes
result2 = MapType.apply_parameters([UTF8Type, FloatType])
self.assertEqual(result2.subtypes, (UTF8Type, FloatType))

def test_cached_type_has_correct_cassname(self):
"""Cached types preserve their cassname."""
result = SetType.apply_parameters([DoubleType])
self.assertEqual(result.cassname, SetType.cassname)

def test_cached_type_with_names(self):
"""Caching works correctly with named parameters (UDT-style)."""
r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
self.assertIs(r1, r2)

def test_different_names_different_cache_entries(self):
"""Different names produce different cached classes."""
r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['key', 'value'])
self.assertIsNot(r1, r2)

def test_names_none_vs_no_names(self):
"""Passing names=None and not passing names use the same cache entry."""
r1 = MapType.apply_parameters([UTF8Type, Int32Type], names=None)
r2 = MapType.apply_parameters([UTF8Type, Int32Type])
self.assertIs(r1, r2)

def test_tuple_subtypes_accepted(self):
"""Both list and tuple subtypes produce the same cached result."""
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
r2 = MapType.apply_parameters((UTF8Type, Int32Type))
self.assertIs(r1, r2)

def test_cache_populated(self):
"""The cache dict is populated after apply_parameters calls."""
_CassandraType._apply_parameters_cache.clear()
MapType.apply_parameters([UTF8Type, Int32Type])
self.assertGreater(len(_CassandraType._apply_parameters_cache), 0)

def test_cache_clear_forces_new_creation(self):
"""Clearing the cache forces new type creation."""
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
_CassandraType._apply_parameters_cache.clear()
r2 = MapType.apply_parameters([UTF8Type, Int32Type])
# After clearing, we get a new class (different object identity)
self.assertIsNot(r1, r2)
# But they should be functionally equivalent
self.assertEqual(r1.subtypes, r2.subtypes)


if __name__ == '__main__':
unittest.main()
Loading