X Tutup
using System; using System.Collections; using System.Collections.Generic; using System.Text; using JetBrains.Annotations; #pragma warning disable CA1040, CA1034 namespace NpgsqlTypes { /// /// Represents a PostgreSQL tsvector. /// public sealed class NpgsqlTsVector : IEnumerable { List _lexemes; internal NpgsqlTsVector(List lexemes, bool noCheck) { if (noCheck) _lexemes = lexemes; else Load(lexemes); } /// /// Constructs an NpgsqlTsVector from a list of lexemes. This also sorts and remove duplicates. /// /// public NpgsqlTsVector(List lexemes) { Load(lexemes); } void Load(List lexemes) { _lexemes = new List(lexemes); if (_lexemes.Count == 0) return; // Culture-specific comparisons doesn't really matter for the backend. It's sorting on its own if it detects an unsorted collection. // Only when a .NET user wants to print the sort order. _lexemes.Sort((a, b) => a.Text.CompareTo(b.Text)); var res = 0; var pos = 1; while (pos < _lexemes.Count) { if (_lexemes[pos].Text != _lexemes[res].Text) { // We're done with this lexeme. First make sure the word pos list is sorted and contains unique elements. _lexemes[res] = new Lexeme(_lexemes[res].Text, Lexeme.UniquePos(_lexemes[res]._wordEntryPositions), true); res++; if (res != pos) _lexemes[res] = _lexemes[pos]; } else { // Just concatenate the word pos lists if (_lexemes[res]._wordEntryPositions != null) { if (_lexemes[pos].Count > 0) _lexemes[res]._wordEntryPositions.AddRange(_lexemes[pos]._wordEntryPositions); } else { _lexemes[res] = _lexemes[pos]; } } pos++; } // Last element _lexemes[res] = new Lexeme(_lexemes[res].Text, Lexeme.UniquePos(_lexemes[res]._wordEntryPositions), true); if (res != pos - 1) { _lexemes.RemoveRange(res, pos - 1 - res); } } /// /// Parses a tsvector in PostgreSQL's text format. /// /// /// public static NpgsqlTsVector Parse(string value) { if (value == null) throw new ArgumentNullException(nameof(value)); var lexemes = new List(); var pos = 0; var wordPos = 0; var sb = new StringBuilder(); List wordEntryPositions; WaitWord: if (pos >= value.Length) goto Finish; if (char.IsWhiteSpace(value[pos])) { pos++; goto WaitWord; } sb.Clear(); if (value[pos] == '\'') { pos++; goto WaitEndComplex; } if (value[pos] == '\\') { pos++; goto WaitNextChar; } sb.Append(value[pos++]); goto WaitEndWord; WaitNextChar: if (pos >= value.Length) throw new FormatException("Missing escaped character after \\ at end of value"); sb.Append(value[pos++]); goto WaitEndWord; WaitEndWord: if (pos >= value.Length || char.IsWhiteSpace(value[pos])) { lexemes.Add(new Lexeme(sb.ToString())); if (pos >= value.Length) goto Finish; pos++; goto WaitWord; } if (value[pos] == '\\') { pos++; goto WaitNextChar; } if (value[pos] == ':') { pos++; goto StartPosInfo; } sb.Append(value[pos++]); goto WaitEndWord; WaitEndComplex: if (pos >= value.Length) throw new FormatException("Unexpected end of value"); if (value[pos] == '\'') { pos++; goto WaitCharComplex; } if (value[pos] == '\\') { pos++; if (pos >= value.Length) throw new FormatException("Missing escaped character after \\ at end of value"); } sb.Append(value[pos++]); goto WaitEndComplex; WaitCharComplex: if (pos < value.Length && value[pos] == '\'') { sb.Append('\''); pos++; goto WaitEndComplex; } if (pos < value.Length && value[pos] == ':') { pos++; goto StartPosInfo; } lexemes.Add(new Lexeme(sb.ToString())); goto WaitWord; StartPosInfo: wordEntryPositions = new List(); goto InPosInfo; InPosInfo: var digitPos = pos; while (pos < value.Length && value[pos] >= '0' && value[pos] <= '9') pos++; if (digitPos == pos) throw new FormatException("Missing length after :"); wordPos = int.Parse(value.Substring(digitPos, pos - digitPos)); goto WaitPosWeightOrDelim; // Note: PostgreSQL backend parser matches also for example 1DD2A, which is parsed into 1A, but not 1AA2D ... WaitPosWeightOrDelim: if (pos < value.Length) { if (value[pos] == 'A' || value[pos] == 'a' || value[pos] == '*') // Why * ? { wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos, Lexeme.Weight.A)); pos++; goto WaitPosDelim; } if (value[pos] >= 'B' && value[pos] <= 'D' || value[pos] >= 'b' && value[pos] <= 'd') { var weight = value[pos]; if (weight >= 'b' && weight <= 'd') weight = (char)(weight - ('b' - 'B')); wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos, Lexeme.Weight.D + ('D' - weight))); pos++; goto WaitPosDelim; } } wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos)); goto WaitPosDelim; WaitPosDelim: if (pos >= value.Length || char.IsWhiteSpace(value[pos])) { if (pos < value.Length) pos++; lexemes.Add(new Lexeme(sb.ToString(), wordEntryPositions)); goto WaitWord; } if (value[pos] == ',') { pos++; goto InPosInfo; } throw new FormatException("Missing comma, whitespace or end of value after lexeme pos info"); Finish: return new NpgsqlTsVector(lexemes); } /// /// Returns the lexeme at a specific index /// /// /// public Lexeme this[int index] { get { if (index < 0 || index >= _lexemes.Count) throw new ArgumentException(nameof(index)); return _lexemes[index]; } } /// /// Gets the number of lexemes. /// public int Count => _lexemes.Count; /// /// Returns an enumerator. /// /// public IEnumerator GetEnumerator() => _lexemes.GetEnumerator(); /// /// Returns an enumerator. /// /// IEnumerator IEnumerable.GetEnumerator() => _lexemes.GetEnumerator(); /// /// Gets a string representation in PostgreSQL's format. /// /// public override string ToString() => string.Join(" ", _lexemes); /// /// Represents a lexeme. A lexeme consists of a text string and optional word entry positions. /// public struct Lexeme : IEquatable { /// /// Gets or sets the text. /// public string Text { get; set; } [CanBeNull] internal List _wordEntryPositions; /// /// Creates a lexeme with no word entry positions. /// /// public Lexeme(string text) { Text = text; _wordEntryPositions = null; } /// /// Creates a lexeme with word entry positions. /// /// /// public Lexeme(string text, [CanBeNull]List wordEntryPositions) : this(text, wordEntryPositions, false) {} internal Lexeme(string text, [CanBeNull] List wordEntryPositions, bool noCopy) { Text = text; if (wordEntryPositions != null) _wordEntryPositions = noCopy ? wordEntryPositions : new List(wordEntryPositions); else _wordEntryPositions = null; } [CanBeNull] internal static List UniquePos([CanBeNull] List list) { if (list == null) return null; var needsProcessing = false; for (var i = 1; i < list.Count; i++) { if (list[i - 1].Pos >= list[i].Pos) { needsProcessing = true; break; } } if (!needsProcessing) return list; // Don't change the original list, as the user might inspect it later if he holds a reference to the lexeme's list list = new List(list); list.Sort((x, y) => x.Pos.CompareTo(y.Pos)); var a = 0; for (var b = 1; b < list.Count; b++) { if (list[a].Pos != list[b].Pos) { a++; if (a != b) list[a] = list[b]; } else if (list[b].Weight > list[a].Weight) list[a] = list[b]; } if (a != list.Count - 1) { list.RemoveRange(a, list.Count - 1 - a); } return list; } /// /// Gets a word entry position. /// /// /// public WordEntryPos this[int index] { get { if (index < 0 || _wordEntryPositions == null || index >= _wordEntryPositions.Count) throw new ArgumentException(nameof(index)); return _wordEntryPositions[index]; } internal set { if (index < 0 || _wordEntryPositions == null || index >= _wordEntryPositions.Count) throw new ArgumentOutOfRangeException(nameof(index)); _wordEntryPositions[index] = value; } } /// /// Gets the number of word entry positions. /// public int Count => _wordEntryPositions?.Count ?? 0; /// /// Creates a string representation in PostgreSQL's format. /// /// public override string ToString() { var str = '\'' + (Text ?? "").Replace(@"\", @"\\").Replace("'", "''") + '\''; if (Count > 0) str += ":" + string.Join(",", _wordEntryPositions); return str; } /// /// Represents a word entry position and an optional weight. /// public struct WordEntryPos : IEquatable { internal short Value { get; } internal WordEntryPos(short value) { Value = value; } /// /// Creates a WordEntryPos with a given position and weight. /// /// Position values can range from 1 to 16383; larger numbers are silently set to 16383. /// A weight labeled between A and D. public WordEntryPos(int pos, Weight weight = Weight.D) { if (pos == 0) throw new ArgumentOutOfRangeException(nameof(pos), "Lexeme position is out of range. Min value is 1, max value is 2^14-1. Value was: " + pos); if (weight < Weight.D || weight > Weight.A) throw new ArgumentOutOfRangeException(nameof(weight)); // Per documentation: "Position values can range from 1 to 16383; larger numbers are silently set to 16383." if (pos >> 14 != 0) pos = (1 << 14) - 1; Value = (short)(((int)weight << 14) | pos); } /// /// The weight is labeled from A to D. D is the default, and not printed. /// public Weight Weight => (Weight)((Value >> 14) & 3); /// /// The position is a 14-bit unsigned integer indicating the position in the text this lexeme occurs. Cannot be 0. /// public int Pos => Value & ((1 << 14) - 1); /// /// Prints this lexeme in PostgreSQL's format, i.e. position is followed by weight (weight is only printed if A, B or C). /// /// public override string ToString() { if (Weight != Weight.D) return Pos + Weight.ToString(); return Pos.ToString(); } /// /// Determines whether the specified object is equal to the current object. /// public bool Equals(WordEntryPos o) => Value == o.Value; /// /// Determines whether the specified object is equal to the current object. /// public override bool Equals([CanBeNull] object o) => o is WordEntryPos && Equals((WordEntryPos)o); /// /// Gets a hash code for the current object. /// public override int GetHashCode() => Value.GetHashCode(); /// /// Determines whether the specified object is equal to the current object. /// public static bool operator ==(WordEntryPos left, WordEntryPos right) => left.Equals(right); /// /// Determines whether the specified object is unequal to the current object. /// public static bool operator !=(WordEntryPos left, WordEntryPos right) => !left.Equals(right); } /// /// The weight is labeled from A to D. D is the default, and not printed. /// public enum Weight { /// /// D, the default /// D = 0, /// /// C /// C = 1, /// /// B /// B = 2, /// /// A /// A = 3 } /// /// Determines whether the specified object is equal to the current object. /// public bool Equals(Lexeme o) => Text == o.Text && ((_wordEntryPositions == null && o._wordEntryPositions == null) || (_wordEntryPositions != null && _wordEntryPositions.Equals(o._wordEntryPositions))); /// /// Determines whether the specified object is equal to the current object. /// public override bool Equals([CanBeNull] object o) => o is Lexeme && Equals((Lexeme)o); /// /// Gets a hash code for the current object. /// public override int GetHashCode() => Text.GetHashCode(); /// /// Determines whether the specified object is equal to the current object. /// public static bool operator ==(Lexeme left, Lexeme right) => left.Equals(right); /// /// Determines whether the specified object is unequal to the current object. /// public static bool operator !=(Lexeme left, Lexeme right) => !left.Equals(right); } } }
X Tutup