Source code for ietfparse.algorithms
"""
Implementations of algorithms from various specifications.
- :func:`.select_content_type`: select the best match between a
HTTP ``Accept`` header and a list of available ``Content-Type`` s
This module implements some of the more interesting algorithms
described in IETF RFCs.
"""
from __future__ import annotations
from collections import abc
from operator import attrgetter
from ietfparse import datastructures, errors
def _content_type_matches(candidate: datastructures.ContentType,
pattern: datastructures.ContentType) -> bool:
"""Is ``candidate`` an exact match or sub-type of ``pattern``?"""
def _wildcard_compare(type_spec: str, type_pattern: str) -> bool:
return type_pattern == '*' or type_spec == type_pattern
return (_wildcard_compare(candidate.content_type, pattern.content_type)
and _wildcard_compare(candidate.content_subtype,
pattern.content_subtype))
[docs]def select_content_type(
requested: abc.Sequence[datastructures.ContentType],
available: abc.Sequence[datastructures.ContentType]
) -> tuple[datastructures.ContentType, datastructures.ContentType]:
"""Selects the best content type.
:param requested: a sequence of :class:`.ContentType` instances
:param available: a sequence of :class:`.ContentType` instances
that the server is capable of producing
:returns: the selected content type (from ``available``) and the
pattern that it matched (from ``requested``)
:rtype: :class:`tuple` of :class:`.ContentType` instances
:raises: :class:`.NoMatch` when a suitable match was not found
This function implements the *Proactive Content Negotiation*
algorithm as described in sections 3.4.1 and 5.3 of :rfc:`7231`.
The input is the `Accept`_ header as parsed by
:func:`.parse_http_accept_header` and a list of
parsed :class:`.ContentType` instances. The ``available`` sequence
should be a sequence of content types that the server is capable of
producing. The selected value should ultimately be used as the
`Content-Type`_ header in the generated response.
.. _Accept: https://tools.ietf.org/html/rfc7231#section-5.3.2
.. _Content-Type: https://tools.ietf.org/html/rfc7231#section-3.1.1.5
"""
class Match(object):
"""Sorting assistant.
Sorting matches is a tricky business. We need a way to
prefer content types by *specificity*. The definition of
*more specific* is a little less than clear. This class
treats the strength of a match as the most important thing.
Wild cards are less specific in all cases. This is tracked
by the ``match_type`` attribute.
If we the candidate and pattern differ only by parameters,
then the strength is based on the number of pattern parameters
that match parameters from the candidate. The easiest way to
track this is to count the number of candidate parameters that
are matched by the pattern. This is what ``parameter_distance``
tracks.
The final key to the solution is to order the result set such
that the most specific matches are first in the list. This
is done by carefully choosing values for ``match_type`` such
that full matches bubble up to the front. We also need a
scheme of counting matching parameters that pushes stronger
matches to the front of the list. The ``parameter_distance``
attribute starts at the number of candidate parameters and
decreases for each matching parameter - the lesser the value,
the stronger the match.
"""
WILDCARD, PARTIAL, FULL_TYPE, = 2, 1, 0
def __init__(self, candidate: datastructures.ContentType,
pattern: datastructures.ContentType) -> None:
self.candidate = candidate
self.pattern = pattern
if pattern.content_type == pattern.content_subtype == '*':
self.match_type = self.WILDCARD
elif pattern.content_subtype == '*':
self.match_type = self.PARTIAL
else:
self.match_type = self.FULL_TYPE
self.parameter_distance = len(self.candidate.parameters)
for key, value in candidate.parameters.items():
if key in pattern.parameters:
if pattern.parameters[key] == value:
self.parameter_distance -= 1
else:
self.parameter_distance += 1
def extract_quality(obj: datastructures.ContentType) -> float:
return 1.0 if obj.quality is None else obj.quality
matches = []
for pattern in sorted(requested, key=extract_quality, reverse=True):
for candidate in sorted(available):
if _content_type_matches(candidate, pattern):
if candidate == pattern: # exact match!!!
if extract_quality(pattern) == 0.0:
raise errors.NoMatch # quality of 0 means NO
return candidate, pattern
matches.append(Match(candidate, pattern))
if not matches:
raise errors.NoMatch
matches = sorted(matches,
key=attrgetter('match_type', 'parameter_distance'))
return matches[0].candidate, matches[0].pattern