X Tutup
using System; using System.Collections; using System.Collections.Generic; using System.Text; // ReSharper disable once CheckNamespace namespace NpgsqlTypes; /// /// Represents a PostgreSQL tsvector. /// public sealed class NpgsqlTsVector : IEnumerable, IEquatable { /// /// Represents an empty tsvector. /// public static readonly NpgsqlTsVector Empty = new NpgsqlTsVector([], noCheck: true); readonly List _lexemes; internal NpgsqlTsVector(List lexemes, bool noCheck = false) { if (noCheck) { _lexemes = lexemes; return; } _lexemes = [..lexemes]; if (_lexemes.Count == 0) return; // Culture-specific comparisons doesn't really matter for the backend. It's sorting on its own if it detects an unsorted collection. // Only when a .NET user wants to print the sort order. _lexemes.Sort((a, b) => string.Compare(a.Text, b.Text, StringComparison.CurrentCulture)); var res = 0; var pos = 1; while (pos < _lexemes.Count) { if (_lexemes[pos].Text != _lexemes[res].Text) { // We're done with this lexeme. First make sure the word pos list is sorted and contains unique elements. _lexemes[res] = new Lexeme(_lexemes[res].Text, Lexeme.UniquePos(_lexemes[res].WordEntryPositions), true); res++; if (res != pos) _lexemes[res] = _lexemes[pos]; } else { // Just concatenate the word pos lists var wordEntryPositions = _lexemes[res].WordEntryPositions; if (wordEntryPositions != null) { var lexeme = _lexemes[pos]; if (lexeme.WordEntryPositions != null) wordEntryPositions.AddRange(lexeme.WordEntryPositions); } else { _lexemes[res] = _lexemes[pos]; } } pos++; } // Last element _lexemes[res] = new Lexeme(_lexemes[res].Text, Lexeme.UniquePos(_lexemes[res].WordEntryPositions), true); if (res != pos - 1) { _lexemes.RemoveRange(res, pos - 1 - res); } } /// /// Parses a tsvector in PostgreSQL's text format. /// /// /// [Obsolete("Client-side parsing of NpgsqlTsVector is unreliable and cannot fully duplicate the PostgreSQL logic. Use PG functions instead (e.g. to_tsvector)")] public static NpgsqlTsVector Parse(string value) { ArgumentNullException.ThrowIfNull(value); var lexemes = new List(); var pos = 0; var wordPos = 0; var sb = new StringBuilder(); List wordEntryPositions; WaitWord: if (pos >= value.Length) goto Finish; if (char.IsWhiteSpace(value[pos])) { pos++; goto WaitWord; } sb.Clear(); if (value[pos] == '\'') { pos++; goto WaitEndComplex; } if (value[pos] == '\\') { pos++; goto WaitNextChar; } sb.Append(value[pos++]); goto WaitEndWord; WaitNextChar: if (pos >= value.Length) throw new FormatException("Missing escaped character after \\ at end of value"); sb.Append(value[pos++]); WaitEndWord: if (pos >= value.Length || char.IsWhiteSpace(value[pos])) { lexemes.Add(new Lexeme(sb.ToString())); if (pos >= value.Length) goto Finish; pos++; goto WaitWord; } if (value[pos] == '\\') { pos++; goto WaitNextChar; } if (value[pos] == ':') { pos++; goto StartPosInfo; } sb.Append(value[pos++]); goto WaitEndWord; WaitEndComplex: if (pos >= value.Length) throw new FormatException("Unexpected end of value"); if (value[pos] == '\'') { pos++; goto WaitCharComplex; } if (value[pos] == '\\') { pos++; if (pos >= value.Length) throw new FormatException("Missing escaped character after \\ at end of value"); } sb.Append(value[pos++]); goto WaitEndComplex; WaitCharComplex: if (pos < value.Length && value[pos] == '\'') { sb.Append('\''); pos++; goto WaitEndComplex; } if (pos < value.Length && value[pos] == ':') { pos++; goto StartPosInfo; } lexemes.Add(new Lexeme(sb.ToString())); goto WaitWord; StartPosInfo: wordEntryPositions = []; InPosInfo: var digitPos = pos; while (pos < value.Length && value[pos] >= '0' && value[pos] <= '9') pos++; if (digitPos == pos) throw new FormatException("Missing length after :"); wordPos = int.Parse(value.Substring(digitPos, pos - digitPos)); // Note: PostgreSQL backend parser matches also for example 1DD2A, which is parsed into 1A, but not 1AA2D ... if (pos < value.Length) { if (value[pos] == 'A' || value[pos] == 'a' || value[pos] == '*') // Why * ? { wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos, Lexeme.Weight.A)); pos++; goto WaitPosDelim; } if (value[pos] >= 'B' && value[pos] <= 'D' || value[pos] >= 'b' && value[pos] <= 'd') { var weight = value[pos]; if (weight is >= 'b' and <= 'd') weight = (char)(weight - ('b' - 'B')); wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos, Lexeme.Weight.D + ('D' - weight))); pos++; goto WaitPosDelim; } } wordEntryPositions.Add(new Lexeme.WordEntryPos(wordPos)); WaitPosDelim: if (pos >= value.Length || char.IsWhiteSpace(value[pos])) { if (pos < value.Length) pos++; lexemes.Add(new Lexeme(sb.ToString(), wordEntryPositions)); goto WaitWord; } if (value[pos] == ',') { pos++; goto InPosInfo; } throw new FormatException("Missing comma, whitespace or end of value after lexeme pos info"); Finish: return new NpgsqlTsVector(lexemes); } /// /// Returns the lexeme at a specific index /// /// /// public Lexeme this[int index] { get { if (index < 0 || index >= _lexemes.Count) throw new ArgumentException(nameof(index)); return _lexemes[index]; } } /// /// Gets the number of lexemes. /// public int Count => _lexemes.Count; /// /// Returns an enumerator. /// /// public IEnumerator GetEnumerator() => _lexemes.GetEnumerator(); /// /// Returns an enumerator. /// /// IEnumerator IEnumerable.GetEnumerator() => _lexemes.GetEnumerator(); /// /// Gets a string representation in PostgreSQL's format. /// /// public override string ToString() => string.Join(" ", _lexemes); /// public bool Equals(NpgsqlTsVector? other) { if (ReferenceEquals(this, other)) return true; if (other is null || _lexemes.Count != other._lexemes.Count) return false; for (var i = 0; i < _lexemes.Count; i++) if (!_lexemes[i].Equals(other._lexemes[i])) return false; return true; } /// public override bool Equals(object? obj) => obj is NpgsqlTsVector other && Equals(other); /// public override int GetHashCode() { var hash = new HashCode(); foreach (var lexeme in _lexemes) hash.Add(lexeme); return hash.ToHashCode(); } /// /// Represents a lexeme. A lexeme consists of a text string and optional word entry positions. /// public struct Lexeme : IEquatable { /// /// Gets or sets the text. /// public string Text { get; set; } internal readonly List? WordEntryPositions; /// /// Creates a lexeme with no word entry positions. /// /// public Lexeme(string text) { Text = text; WordEntryPositions = null; } /// /// Creates a lexeme with word entry positions. /// /// /// public Lexeme(string text, List? wordEntryPositions) : this(text, wordEntryPositions, false) {} internal Lexeme(string text, List? wordEntryPositions, bool noCopy) { Text = text; if (wordEntryPositions != null) WordEntryPositions = noCopy ? wordEntryPositions : [..wordEntryPositions]; else WordEntryPositions = null; } internal static List? UniquePos(List? list) { if (list == null) return null; var needsProcessing = false; for (var i = 1; i < list.Count; i++) { if (list[i - 1].Pos >= list[i].Pos) { needsProcessing = true; break; } } if (!needsProcessing) return list; // Don't change the original list, as the user might inspect it later if he holds a reference to the lexeme's list list = [..list]; list.Sort((x, y) => x.Pos.CompareTo(y.Pos)); var a = 0; for (var b = 1; b < list.Count; b++) { if (list[a].Pos != list[b].Pos) { a++; if (a != b) list[a] = list[b]; } else if (list[b].Weight > list[a].Weight) list[a] = list[b]; } if (a != list.Count - 1) { list.RemoveRange(a, list.Count - 1 - a); } return list; } /// /// Gets a word entry position. /// /// /// public WordEntryPos this[int index] { get { if (index < 0 || WordEntryPositions == null || index >= WordEntryPositions.Count) throw new ArgumentException(nameof(index)); return WordEntryPositions[index]; } internal set { if (index < 0 || WordEntryPositions == null || index >= WordEntryPositions.Count) throw new ArgumentOutOfRangeException(nameof(index)); WordEntryPositions[index] = value; } } /// /// Gets the number of word entry positions. /// public int Count => WordEntryPositions?.Count ?? 0; /// /// Creates a string representation in PostgreSQL's format. /// /// public override string ToString() { var str = '\'' + (Text ?? "").Replace(@"\", @"\\").Replace("'", "''") + '\''; if (Count > 0) str += ":" + string.Join(",", WordEntryPositions!); return str; } /// /// Represents a word entry position and an optional weight. /// public struct WordEntryPos : IEquatable { internal short Value { get; } internal WordEntryPos(short value) => Value = value; /// /// Creates a WordEntryPos with a given position and weight. /// /// Position values can range from 1 to 16383; larger numbers are silently set to 16383. /// A weight labeled between A and D. public WordEntryPos(int pos, Weight weight = Weight.D) { if (pos == 0) throw new ArgumentOutOfRangeException(nameof(pos), "Lexeme position is out of range. Min value is 1, max value is 2^14-1. Value was: " + pos); if (weight < Weight.D || weight > Weight.A) throw new ArgumentOutOfRangeException(nameof(weight)); // Per documentation: "Position values can range from 1 to 16383; larger numbers are silently set to 16383." if (pos >> 14 != 0) pos = (1 << 14) - 1; Value = (short)(((int)weight << 14) | pos); } /// /// The weight is labeled from A to D. D is the default, and not printed. /// public Weight Weight => (Weight)((Value >> 14) & 3); /// /// The position is a 14-bit unsigned integer indicating the position in the text this lexeme occurs. Cannot be 0. /// public int Pos => Value & ((1 << 14) - 1); /// /// Prints this lexeme in PostgreSQL's format, i.e. position is followed by weight (weight is only printed if A, B or C). /// /// public override string ToString() { if (Weight != Weight.D) return Pos + Weight.ToString(); return Pos.ToString(); } /// /// Determines whether the specified object is equal to the current object. /// public bool Equals(WordEntryPos o) => Value == o.Value; /// /// Determines whether the specified object is equal to the current object. /// public override bool Equals(object? o) => o is WordEntryPos pos && Equals(pos); /// /// Gets a hash code for the current object. /// public override int GetHashCode() => Value.GetHashCode(); /// /// Determines whether the specified object is equal to the current object. /// public static bool operator ==(WordEntryPos left, WordEntryPos right) => left.Equals(right); /// /// Determines whether the specified object is unequal to the current object. /// public static bool operator !=(WordEntryPos left, WordEntryPos right) => !left.Equals(right); } /// /// The weight is labeled from A to D. D is the default, and not printed. /// public enum Weight { /// /// D, the default /// D = 0, /// /// C /// C = 1, /// /// B /// B = 2, /// /// A /// A = 3 } /// /// Determines whether the specified object is equal to the current object. /// public bool Equals(Lexeme o) { if (Text != o.Text) return false; if (WordEntryPositions is null) return o.WordEntryPositions is null; if (o.WordEntryPositions is null || WordEntryPositions.Count != o.WordEntryPositions.Count) return false; for (var i = 0; i < WordEntryPositions.Count; i++) if (!WordEntryPositions[i].Equals(o.WordEntryPositions[i])) return false; return true; } /// /// Determines whether the specified object is equal to the current object. /// public override bool Equals(object? o) => o is Lexeme lexeme && Equals(lexeme); /// /// Gets a hash code for the current object. /// public override int GetHashCode() => Text.GetHashCode(); /// /// Determines whether the specified object is equal to the current object. /// public static bool operator ==(Lexeme left, Lexeme right) => left.Equals(right); /// /// Determines whether the specified object is unequal to the current object. /// public static bool operator !=(Lexeme left, Lexeme right) => !left.Equals(right); } }
X Tutup