X Tutup
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Row class was taken from Apache Spark pyspark. from typing import Any, Dict, List, Optional, Tuple, Union class Row(tuple): """ A row in a query result. The fields in it can be accessed: * like attributes (``row.key``) * like dictionary values (``row[key]``) ``key in row`` will search through row keys. Row can be used to create a row object by using named arguments. It is not allowed to omit a named argument to represent that the value is None or missing. This should be explicitly set to None in this case. Examples -------- >>> row = Row(name="Alice", age=11) >>> row Row(name='Alice', age=11) >>> row['name'], row['age'] ('Alice', 11) >>> row.name, row.age ('Alice', 11) >>> 'name' in row True >>> 'wrong_key' in row False Row also can be used to create another Row like class, then it could be used to create Row objects, such as >>> Person = Row("name", "age") >>> Person >>> 'name' in Person True >>> 'wrong_key' in Person False >>> Person("Alice", 11) Row(name='Alice', age=11) This form can also be used to create rows as tuple values, i.e. with unnamed fields. >>> row1 = Row("Alice", 11) >>> row2 = Row(name="Alice", age=11) >>> row1 == row2 True """ def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": if args and kwargs: raise ValueError("Can not use both args " "and kwargs to create Row") if kwargs: # create row objects row = tuple.__new__(cls, list(kwargs.values())) row.__fields__ = list(kwargs.keys()) return row else: # create row class or objects return tuple.__new__(cls, args) def asDict(self, recursive: bool = False) -> Dict[str, Any]: """ Return as a dict Parameters ---------- recursive : bool, optional turns the nested Rows to dict (default: False). Notes ----- If a row contains duplicate field names, e.g., the rows of a join between two dataframes that both have the fields of same names, one of the duplicate fields will be selected by ``asDict``. ``__getitem__`` will also return one of the duplicate fields, however returned value might be different to ``asDict``. Examples -------- >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11} True >>> row = Row(key=1, value=Row(name='a', age=2)) >>> row.asDict() == {'key': 1, 'value': Row(name='a', age=2)} True >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}} True """ if not hasattr(self, "__fields__"): raise TypeError("Cannot convert a Row class into dict") if recursive: def conv(obj: Any) -> Any: if isinstance(obj, Row): return obj.asDict(True) elif isinstance(obj, list): return [conv(o) for o in obj] elif isinstance(obj, dict): return dict((k, conv(v)) for k, v in obj.items()) else: return obj return dict(zip(self.__fields__, (conv(o) for o in self))) else: return dict(zip(self.__fields__, self)) def __contains__(self, item: Any) -> bool: if hasattr(self, "__fields__"): return item in self.__fields__ else: return super(Row, self).__contains__(item) # let object acts like class def __call__(self, *args: Any) -> "Row": """create new Row object""" if len(args) > len(self): raise ValueError( "Can not create Row with fields %s, expected %d values " "but got %s" % (self, len(self), args) ) return _create_row(self, args) def __getitem__(self, item: Any) -> Any: if isinstance(item, (int, slice)): return super(Row, self).__getitem__(item) try: # it will be slow when it has many fields, # but this will not be used in normal cases idx = self.__fields__.index(item) return super(Row, self).__getitem__(idx) except IndexError: raise KeyError(item) except ValueError: raise ValueError(item) def __getattr__(self, item: str) -> Any: if item.startswith("__"): raise AttributeError(item) try: # it will be slow when it has many fields, # but this will not be used in normal cases idx = self.__fields__.index(item) return self[idx] except IndexError: raise AttributeError(item) except ValueError: raise AttributeError(item) def __setattr__(self, key: Any, value: Any) -> None: if key != "__fields__": raise RuntimeError("Row is read-only") self.__dict__[key] = value def __reduce__( self, ) -> Union[str, Tuple[Any, ...]]: """Returns a tuple so Python knows how to pickle Row.""" if hasattr(self, "__fields__"): return (_create_row, (self.__fields__, tuple(self))) else: return tuple.__reduce__(self) def __repr__(self) -> str: """Printable representation of Row used in Python REPL.""" if hasattr(self, "__fields__"): return "Row(%s)" % ", ".join( "%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self)) ) else: return "" % ", ".join("%r" % field for field in self) def _create_row( fields: Union["Row", List[str]], values: Union[Tuple[Any, ...], List[Any]] ) -> "Row": row = Row(*values) row.__fields__ = fields return row
X Tutup