Source code for treq.content

import cgi
import json

from twisted.internet.defer import Deferred, succeed

from twisted.internet.protocol import Protocol
from twisted.web.client import ResponseDone
from twisted.web.http import PotentialDataLoss


def _encoding_from_headers(headers):
    content_types = headers.getRawHeaders(u'content-type')
    if content_types is None:
        return None

    # This seems to be the choice browsers make when encountering multiple
    # content-type headers.
    content_type, params = cgi.parse_header(content_types[-1])

    if 'charset' in params:
        return params.get('charset').strip("'\"")

    if content_type == 'application/json':
        return 'UTF-8'


class _BodyCollector(Protocol):
    def __init__(self, finished, collector):
        self.finished = finished
        self.collector = collector

    def dataReceived(self, data):
        self.collector(data)

    def connectionLost(self, reason):
        if reason.check(ResponseDone):
            self.finished.callback(None)
        elif reason.check(PotentialDataLoss):
            # http://twistedmatrix.com/trac/ticket/4840
            self.finished.callback(None)
        else:
            self.finished.errback(reason)


[docs]def collect(response, collector): """ Incrementally collect the body of the response. This function may only be called **once** for a given response. :param IResponse response: The HTTP response to collect the body from. :param collector: A callable to be called each time data is available from the response body. :type collector: single argument callable :rtype: Deferred that fires with None when the entire body has been read. """ if response.length == 0: return succeed(None) d = Deferred() response.deliverBody(_BodyCollector(d, collector)) return d
[docs]def content(response): """ Read the contents of an HTTP response. This function may be called multiple times for a response, it uses a ``WeakKeyDictionary`` to cache the contents of the response. :param IResponse response: The HTTP Response to get the contents of. :rtype: Deferred that fires with the content as a str. """ _content = [] d = collect(response, _content.append) d.addCallback(lambda _: b''.join(_content)) return d
[docs]def json_content(response, **kwargs): """ Read the contents of an HTTP response and attempt to decode it as JSON. This function relies on :py:func:`content` and so may be called more than once for a given response. :param IResponse response: The HTTP Response to get the contents of. :param kwargs: Any keyword arguments accepted by :py:func:`json.loads` :rtype: Deferred that fires with the decoded JSON. """ # RFC7159 (8.1): Default JSON character encoding is UTF-8 d = text_content(response, encoding='utf-8') d.addCallback(lambda text: json.loads(text, **kwargs)) return d
[docs]def text_content(response, encoding='ISO-8859-1'): """ Read the contents of an HTTP response and decode it with an appropriate charset, which may be guessed from the ``Content-Type`` header. :param IResponse response: The HTTP Response to get the contents of. :param str encoding: A charset, such as ``UTF-8`` or ``ISO-8859-1``, used if the response does not specify an encoding. :rtype: Deferred that fires with a unicode string. """ def _decode_content(c): e = _encoding_from_headers(response.headers) if e is not None: return c.decode(e) return c.decode(encoding) d = content(response) d.addCallback(_decode_content) return d