From cc288fe93b719e2fad5e1929004878aa04095666 Mon Sep 17 00:00:00 2001 From: Albert Skalt Date: Fri, 6 Feb 2026 19:12:29 +0300 Subject: [PATCH] add ability to map tokens during tokenization This patch adds a method to map tokens with provided mapper during tokenization. This way tokens could be replaced without an additional pass. --- src/tokenizer.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cc5a2aa17..852b73164 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -934,6 +934,16 @@ impl<'a> Tokenizer<'a> { pub fn tokenize_with_location_into_buf( &mut self, buf: &mut Vec, + ) -> Result<(), TokenizerError> { + self.tokenize_with_location_into_buf_with_mapper(buf, |token| token) + } + + /// Tokenize the statement and produce a vector of tokens, mapping each token + /// with provided `mapper` + pub fn tokenize_with_location_into_buf_with_mapper( + &mut self, + buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { let mut state = State { peekable: self.query.chars().peekable(), @@ -952,10 +962,10 @@ impl<'a> Tokenizer<'a> { && comment.starts_with('!') => { // Re-tokenize the hints and add them to the buffer - self.tokenize_comment_hints(comment, span, buf)?; + self.tokenize_comment_hints(comment, span, buf, &mut mapper)?; } _ => { - buf.push(TokenWithSpan { token, span }); + buf.push(mapper(TokenWithSpan { token, span })); } } @@ -971,6 +981,7 @@ impl<'a> Tokenizer<'a> { comment: &str, span: Span, buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { // Strip the leading '!' and any version digits (e.g., "50110") let hint_content = comment @@ -997,10 +1008,10 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { let token_span = location.span_to(state.location()); - buf.push(TokenWithSpan { + buf.push(mapper(TokenWithSpan { token, span: token_span, - }); + })); location = state.location(); } @@ -2644,6 +2655,38 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_with_mapper() { + let sql = String::from("SELECT ?"); + let dialect = GenericDialect {}; + let mut param_num = 1; + + let mut tokens = vec![]; + Tokenizer::new(&dialect, &sql) + .tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut token_span| { + token_span.token = match token_span.token { + Token::Placeholder(n) => Token::Placeholder(if n == "?" { + let ret = format!("${}", param_num); + param_num += 1; + ret + } else { + n + }), + token => token, + }; + token_span + }) + .unwrap(); + let actual = tokens.into_iter().map(|t| t.token).collect(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$1".to_string()), + ]; + + compare(expected, actual); + } + #[test] fn tokenize_clickhouse_double_equal() { let sql = String::from("SELECT foo=='1'");