This commit is contained in:
2025-05-22 21:22:15 +02:00
parent 3d57f842f9
commit 97cb9c8703
156 changed files with 1205 additions and 6603 deletions

View File

@@ -1,16 +1,10 @@
from __future__ import annotations
import io
import re
import typing as t
import warnings
from functools import partial
from functools import update_wrapper
from itertools import chain
from ._internal import _make_encode_wrapper
from ._internal import _to_bytes
from ._internal import _to_str
from .exceptions import ClientDisconnected
from .exceptions import RequestEntityTooLarge
from .sansio import utils as _sansio_utils
@@ -200,45 +194,18 @@ def get_input_stream(
return t.cast(t.IO[bytes], LimitedStream(stream, content_length))
def get_path_info(
environ: WSGIEnvironment,
charset: t.Any = ...,
errors: str | None = None,
) -> str:
def get_path_info(environ: WSGIEnvironment) -> str:
"""Return ``PATH_INFO`` from the WSGI environment.
:param environ: WSGI environment to get the path from.
.. versionchanged:: 2.3
The ``charset`` and ``errors`` parameters are deprecated and will be removed in
Werkzeug 3.0.
.. versionchanged:: 3.0
The ``charset`` and ``errors`` parameters were removed.
.. versionadded:: 0.9
"""
if charset is not ...:
warnings.warn(
"The 'charset' parameter is deprecated and will be removed"
" in Werkzeug 3.0.",
DeprecationWarning,
stacklevel=2,
)
if charset is None:
charset = "utf-8"
else:
charset = "utf-8"
if errors is not None:
warnings.warn(
"The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0",
DeprecationWarning,
stacklevel=2,
)
else:
errors = "replace"
path = environ.get("PATH_INFO", "").encode("latin1")
return path.decode(charset, errors) # type: ignore[no-any-return]
path: bytes = environ.get("PATH_INFO", "").encode("latin1")
return path.decode(errors="replace")
class ClosingIterator:
@@ -455,225 +422,6 @@ class _RangeWrapper:
self.iterable.close()
def _make_chunk_iter(
stream: t.Iterable[bytes] | t.IO[bytes],
limit: int | None,
buffer_size: int,
) -> t.Iterator[bytes]:
"""Helper for the line and chunk iter functions."""
warnings.warn(
"'_make_chunk_iter' is deprecated and will be removed in Werkzeug 3.0.",
DeprecationWarning,
stacklevel=2,
)
if isinstance(stream, (bytes, bytearray, str)):
raise TypeError(
"Passed a string or byte object instead of true iterator or stream."
)
if not hasattr(stream, "read"):
for item in stream:
if item:
yield item
return
stream = t.cast(t.IO[bytes], stream)
if not isinstance(stream, LimitedStream) and limit is not None:
stream = t.cast(t.IO[bytes], LimitedStream(stream, limit))
_read = stream.read
while True:
item = _read(buffer_size)
if not item:
break
yield item
def make_line_iter(
stream: t.Iterable[bytes] | t.IO[bytes],
limit: int | None = None,
buffer_size: int = 10 * 1024,
cap_at_buffer: bool = False,
) -> t.Iterator[bytes]:
"""Safely iterates line-based over an input stream. If the input stream
is not a :class:`LimitedStream` the `limit` parameter is mandatory.
This uses the stream's :meth:`~file.read` method internally as opposite
to the :meth:`~file.readline` method that is unsafe and can only be used
in violation of the WSGI specification. The same problem applies to the
`__iter__` function of the input stream which calls :meth:`~file.readline`
without arguments.
If you need line-by-line processing it's strongly recommended to iterate
over the input stream using this helper function.
.. deprecated:: 2.3
Will be removed in Werkzeug 3.0.
.. versionadded:: 0.11
added support for the `cap_at_buffer` parameter.
.. versionadded:: 0.9
added support for iterators as input stream.
.. versionchanged:: 0.8
This function now ensures that the limit was reached.
:param stream: the stream or iterate to iterate over.
:param limit: the limit in bytes for the stream. (Usually
content length. Not necessary if the `stream`
is a :class:`LimitedStream`.
:param buffer_size: The optional buffer size.
:param cap_at_buffer: if this is set chunks are split if they are longer
than the buffer size. Internally this is implemented
that the buffer size might be exhausted by a factor
of two however.
"""
warnings.warn(
"'make_line_iter' is deprecated and will be removed in Werkzeug 3.0.",
DeprecationWarning,
stacklevel=2,
)
_iter = _make_chunk_iter(stream, limit, buffer_size)
first_item = next(_iter, "")
if not first_item:
return
s = _make_encode_wrapper(first_item)
empty = t.cast(bytes, s(""))
cr = t.cast(bytes, s("\r"))
lf = t.cast(bytes, s("\n"))
crlf = t.cast(bytes, s("\r\n"))
_iter = t.cast(t.Iterator[bytes], chain((first_item,), _iter))
def _iter_basic_lines() -> t.Iterator[bytes]:
_join = empty.join
buffer: list[bytes] = []
while True:
new_data = next(_iter, "")
if not new_data:
break
new_buf: list[bytes] = []
buf_size = 0
for item in t.cast(
t.Iterator[bytes], chain(buffer, new_data.splitlines(True))
):
new_buf.append(item)
buf_size += len(item)
if item and item[-1:] in crlf:
yield _join(new_buf)
new_buf = []
elif cap_at_buffer and buf_size >= buffer_size:
rv = _join(new_buf)
while len(rv) >= buffer_size:
yield rv[:buffer_size]
rv = rv[buffer_size:]
new_buf = [rv]
buffer = new_buf
if buffer:
yield _join(buffer)
# This hackery is necessary to merge 'foo\r' and '\n' into one item
# of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
previous = empty
for item in _iter_basic_lines():
if item == lf and previous[-1:] == cr:
previous += item
item = empty
if previous:
yield previous
previous = item
if previous:
yield previous
def make_chunk_iter(
stream: t.Iterable[bytes] | t.IO[bytes],
separator: bytes,
limit: int | None = None,
buffer_size: int = 10 * 1024,
cap_at_buffer: bool = False,
) -> t.Iterator[bytes]:
"""Works like :func:`make_line_iter` but accepts a separator
which divides chunks. If you want newline based processing
you should use :func:`make_line_iter` instead as it
supports arbitrary newline markers.
.. deprecated:: 2.3
Will be removed in Werkzeug 3.0.
.. versionchanged:: 0.11
added support for the `cap_at_buffer` parameter.
.. versionchanged:: 0.9
added support for iterators as input stream.
.. versionadded:: 0.8
:param stream: the stream or iterate to iterate over.
:param separator: the separator that divides chunks.
:param limit: the limit in bytes for the stream. (Usually
content length. Not necessary if the `stream`
is otherwise already limited).
:param buffer_size: The optional buffer size.
:param cap_at_buffer: if this is set chunks are split if they are longer
than the buffer size. Internally this is implemented
that the buffer size might be exhausted by a factor
of two however.
"""
warnings.warn(
"'make_chunk_iter' is deprecated and will be removed in Werkzeug 3.0.",
DeprecationWarning,
stacklevel=2,
)
_iter = _make_chunk_iter(stream, limit, buffer_size)
first_item = next(_iter, b"")
if not first_item:
return
_iter = t.cast(t.Iterator[bytes], chain((first_item,), _iter))
if isinstance(first_item, str):
separator = _to_str(separator)
_split = re.compile(f"({re.escape(separator)})").split
_join = "".join
else:
separator = _to_bytes(separator)
_split = re.compile(b"(" + re.escape(separator) + b")").split
_join = b"".join
buffer: list[bytes] = []
while True:
new_data = next(_iter, b"")
if not new_data:
break
chunks = _split(new_data)
new_buf: list[bytes] = []
buf_size = 0
for item in chain(buffer, chunks):
if item == separator:
yield _join(new_buf)
new_buf = []
buf_size = 0
else:
buf_size += len(item)
new_buf.append(item)
if cap_at_buffer and buf_size >= buffer_size:
rv = _join(new_buf)
while len(rv) >= buffer_size:
yield rv[:buffer_size]
rv = rv[buffer_size:]
new_buf = [rv]
buf_size = len(rv)
buffer = new_buf
if buffer:
yield _join(buffer)
class LimitedStream(io.RawIOBase):
"""Wrap a stream so that it doesn't read more than a given limit. This is used to
limit ``wsgi.input`` to the ``Content-Length`` header value or