"""
Scrapy Item
See documentation in docs/topics/item.rst
"""
from abc import ABCMeta
from collections.abc import MutableMapping
from copy import deepcopy
from pprint import pformat
from typing import Dict
from warnings import warn
from scrapy.utils.deprecate import ScrapyDeprecationWarning
from scrapy.utils.trackref import object_ref
class _BaseItem(object_ref):
"""
Temporary class used internally to avoid the deprecation
warning raised by isinstance checks using BaseItem.
"""
pass
class _BaseItemMeta(ABCMeta):
def __instancecheck__(cls, instance):
if cls is BaseItem:
warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
ScrapyDeprecationWarning, stacklevel=2)
return super().__instancecheck__(instance)
class BaseItem(_BaseItem, metaclass=_BaseItemMeta):
"""
Deprecated, please use :class:`scrapy.item.Item` instead
"""
def __new__(cls, *args, **kwargs):
if issubclass(cls, BaseItem) and not issubclass(cls, (Item, DictItem)):
warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
ScrapyDeprecationWarning, stacklevel=2)
return super().__new__(cls, *args, **kwargs)
[ドキュメント]class Field(dict):
"""Container of field metadata"""
class DictItem(MutableMapping, BaseItem):
fields: Dict[str, Field] = {}
def __new__(cls, *args, **kwargs):
if issubclass(cls, DictItem) and not issubclass(cls, Item):
warn('scrapy.item.DictItem is deprecated, please use scrapy.item.Item instead',
ScrapyDeprecationWarning, stacklevel=2)
return super().__new__(cls, *args, **kwargs)
def __init__(self, *args, **kwargs):
self._values = {}
if args or kwargs: # avoid creating dict for most common case
for k, v in dict(*args, **kwargs).items():
self[k] = v
def __getitem__(self, key):
return self._values[key]
def __setitem__(self, key, value):
if key in self.fields:
self._values[key] = value
else:
raise KeyError(f"{self.__class__.__name__} does not support field: {key}")
def __delitem__(self, key):
del self._values[key]
def __getattr__(self, name):
if name in self.fields:
raise AttributeError(f"Use item[{name!r}] to get field value")
raise AttributeError(name)
def __setattr__(self, name, value):
if not name.startswith('_'):
raise AttributeError(f"Use item[{name!r}] = {value!r} to set field value")
super().__setattr__(name, value)
def __len__(self):
return len(self._values)
def __iter__(self):
return iter(self._values)
__hash__ = BaseItem.__hash__
def keys(self):
return self._values.keys()
def __repr__(self):
return pformat(dict(self))
def copy(self):
return self.__class__(self)
def deepcopy(self):
"""Return a :func:`~copy.deepcopy` of this item.
"""
return deepcopy(self)
[ドキュメント]class Item(DictItem, metaclass=ItemMeta):
"""
Base class for scraped items.
In Scrapy, an object is considered an ``item`` if it is an instance of either
:class:`Item` or :class:`dict`, or any subclass. For example, when the output of a
spider callback is evaluated, only instances of :class:`Item` or
:class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
If you need instances of a custom class to be considered items by Scrapy,
you must inherit from either :class:`Item` or :class:`dict`.
Items must declare :class:`Field` attributes, which are processed and stored
in the ``fields`` attribute. This restricts the set of allowed field names
and prevents typos, raising ``KeyError`` when referring to undefined fields.
Additionally, fields can be used to define metadata and control the way
data is processed internally. Please refer to the :ref:`documentation
about fields <topics-items-fields>` for additional information.
Unlike instances of :class:`dict`, instances of :class:`Item` may be
:ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
"""