Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ public final void applyPattern(final String pattern) {
foundFormats.add(format);
foundDescriptions.add(format == null ? null : formatDescription);
if (foundFormats.size() != fmtCount) {
throw new IllegalArgumentException("The validated expression is false");
throw new IllegalArgumentException("Format elements do not match format count: " + foundFormats.size() + " != " + fmtCount);
}
if (foundDescriptions.size() != fmtCount) {
throw new IllegalArgumentException("The validated expression is false");
throw new IllegalArgumentException("Format descriptions do not match format count: " + foundDescriptions.size() + " != " + fmtCount);
}
if (c[pos.getIndex()] != END_FE) {
throw new IllegalArgumentException("Unreadable format element at position " + start);
Expand Down
64 changes: 28 additions & 36 deletions src/main/java/org/apache/commons/text/WordUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.function.IntUnaryOperator;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -170,33 +171,7 @@ public static String capitalize(final String str) {
* @see #capitalizeFully(String)
*/
public static String capitalize(final String str, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;

boolean capitalizeNext = true;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);

if (isDelimiter.test(codePoint)) {
capitalizeNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
} else if (capitalizeNext) {
final int titleCaseCodePoint = Character.toTitleCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
capitalizeNext = false;
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
}
}
return new String(newCodePoints, 0, outOffset);
return applyWordCaseTransform(str, delimiters, Character::toTitleCase);
}

/**
Expand Down Expand Up @@ -530,27 +505,44 @@ public static String uncapitalize(final String str) {
* @see #capitalize(String)
*/
public static String uncapitalize(final String str, final char... delimiters) {
return applyWordCaseTransform(str, delimiters, Character::toLowerCase);
}

/**
* Applies a case-transformation function to the first character of each word in a String.
*
* <p>This is a private helper used by both {@link #capitalize(String, char...)} and
* {@link #uncapitalize(String, char...)} to eliminate duplicated tokenization logic.
* The {@code transform} function is applied to the first code point of each word;
* all other code points are passed through unchanged.</p>
*
* @param str the String to transform, may be null.
* @param delimiters set of characters to determine word boundaries, null means whitespace.
* @param transform the casing function to apply to the first code point of each word
* (e.g., {@code Character::toTitleCase} or {@code Character::toLowerCase}).
* @return the transformed String, or {@code null}/{@code ""} if the input is null/empty.
*/
private static String applyWordCaseTransform(
final String str, final char[] delimiters, final IntUnaryOperator transform) {
if (StringUtils.isEmpty(str)) {
return str;
}
final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;

boolean uncapitalizeNext = true;
boolean transformNext = true;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);

if (isDelimiter.test(codePoint)) {
uncapitalizeNext = true;
transformNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
} else if (uncapitalizeNext) {
final int titleCaseCodePoint = Character.toLowerCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
uncapitalizeNext = false;
} else if (transformNext) {
final int transformed = transform.applyAsInt(codePoint);
newCodePoints[outOffset++] = transformed;
index += Character.charCount(transformed);
transformNext = false;
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ public class JaroWinklerSimilarity implements SimilarityScore<Double> {
*/
static final JaroWinklerSimilarity INSTANCE = new JaroWinklerSimilarity();

/**
* The maximum length of the common prefix that is evaluated.
*/
private static final int MAX_PREFIX_LENGTH = 4;

/**
* The default Winkler threshold.
*/
private static final double DEFAULT_WINKLER_THRESHOLD = 0.7d;

/**
* Computes the Jaro-Winkler string matches, half transpositions, prefix array.
*
Expand Down Expand Up @@ -110,7 +120,7 @@ protected static <E> int[] matches(final SimilarityInput<E> first, final Similar
}
}
int prefix = 0;
for (int mi = 0; mi < Math.min(4, min.length()); mi++) {
for (int mi = 0; mi < Math.min(MAX_PREFIX_LENGTH, min.length()); mi++) {
if (!first.at(mi).equals(second.at(mi))) {
break;
}
Expand Down Expand Up @@ -211,7 +221,7 @@ public <E> Double apply(final SimilarityInput<E> left, final SimilarityInput<E>
return 0d;
}
final double j = (m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m) / 3;
return j < 0.7d ? j : j + defaultScalingFactor * mtp[2] * (1d - j);
return j < DEFAULT_WINKLER_THRESHOLD ? j : j + defaultScalingFactor * mtp[2] * (1d - j);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;


/**
* Tests {@link FileStringLookup}.
Expand Down Expand Up @@ -133,13 +133,19 @@ void testFenceNullOne() throws Exception {
}

@Test
void testFenceRelativeParentTraversal(@TempDir final Path tempDir) throws Exception {
void testFenceRelativeParentTraversal() throws Exception {
// A real, readable file that lives outside the fence but is reachable from the working
// directory through leading ".." segments. The fence must reject it; if the leading ".."
// survives unresolved, the prefix check passes and the file is read, escaping the fence.
final Path tempDir = Paths.get("target/tempDir").toAbsolutePath();
Files.createDirectories(tempDir);
final Path secret = Files.write(tempDir.resolve("secret.txt"), "secret".getBytes(StandardCharsets.UTF_8));
final Path relativeEscape = CURRENT_PATH.toAbsolutePath().relativize(secret);
final FileStringLookup fileStringLookup = new FileStringLookup(CURRENT_PATH);

final Path fenceDir = Paths.get("target/fence").toAbsolutePath();
Files.createDirectories(fenceDir);

final Path relativeEscape = fenceDir.relativize(secret);
final FileStringLookup fileStringLookup = new FileStringLookup(fenceDir);
assertThrows(IllegalArgumentException.class, () -> fileStringLookup.apply("UTF-8:" + relativeEscape));
}

Expand Down