-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathEfficientSuffixArray.cs
More file actions
100 lines (79 loc) · 2.94 KB
/
EfficientSuffixArray.cs
File metadata and controls
100 lines (79 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
using System;
using System.Collections.Generic;
using System.Linq;
namespace AlgorithmsAndDataStructures.DataStructures.SuffixArray;
public class EfficientSuffixArray
{
private readonly string input;
private readonly int[] suffixes;
public EfficientSuffixArray(string input)
{
suffixes = string.IsNullOrEmpty(input) ? Array.Empty<int>() : Build(input);
this.input = input;
}
public IReadOnlyCollection<int> Suffixes => suffixes.ToList().AsReadOnly();
private static int[] Build(string input)
{
var ordering = new int[input.Length];
var tuples = new EfficientSuffixArrayNode[input.Length];
for (var i = 0; i < input.Length - 1; i++)
tuples[i] = new EfficientSuffixArrayNode(input)
{
Index = i,
Rank = input[i],
NextRank = input[i + 1]
};
tuples[input.Length - 1] = new EfficientSuffixArrayNode(input)
{
Index = input.Length - 1,
Rank = input[^1],
NextRank = -1
};
Array.Sort(tuples); // can be replaced with Radix sort to make it more performant
for (var prefixLength = 4; input.Length * 2 > prefixLength; prefixLength = prefixLength * 2)
{
var rank = 0;
var previousSuffixRank = tuples[0].Rank;
tuples[0].Rank = 0;
for (var j = 1; j < input.Length; j++)
{
if (tuples[j].Rank == previousSuffixRank && tuples[j].NextRank == tuples[j - 1].NextRank)
{
previousSuffixRank = tuples[j].Rank;
tuples[j].Rank = rank;
}
else
{
previousSuffixRank = tuples[j].Rank;
tuples[j].Rank = ++rank;
}
ordering[tuples[j].Index] = j;
}
for (var i = 0; i < input.Length; i++)
{
var next = tuples[i].Index + prefixLength / 2;
tuples[i].NextRank = next < input.Length ? tuples[ordering[next]].Rank : -1;
}
Array.Sort(tuples); // can be replaced with Radix sort to make it more pre-formant
}
return tuples.Select(arg => arg.Index).ToArray();
}
public bool Contains(string pattern)
{
if (string.IsNullOrEmpty(pattern)) return true;
var start = 0;
var end = suffixes.Length - 1;
while (start < end)
{
var mid = start + (end - start) / 2;
var substring = input.Substring(mid, Math.Min(pattern.Length, input.Length - mid));
var comparisonResult = string.Compare(substring, pattern, StringComparison.InvariantCulture);
if (comparisonResult == 0) return true;
if (comparisonResult > 0)
start = mid + 1;
else
end = mid - 1;
}
return false;
}
}