428 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			428 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
from __future__ import annotations
 | 
						||
 | 
						||
import dataclasses
 | 
						||
import os
 | 
						||
import re
 | 
						||
import sys
 | 
						||
import warnings
 | 
						||
from collections.abc import Generator
 | 
						||
from typing import Callable
 | 
						||
 | 
						||
from .datastructures import Headers
 | 
						||
from .exceptions import SecurityError
 | 
						||
from .version import version as websockets_version
 | 
						||
 | 
						||
 | 
						||
__all__ = [
 | 
						||
    "SERVER",
 | 
						||
    "USER_AGENT",
 | 
						||
    "Request",
 | 
						||
    "Response",
 | 
						||
]
 | 
						||
 | 
						||
 | 
						||
PYTHON_VERSION = "{}.{}".format(*sys.version_info)
 | 
						||
 | 
						||
# User-Agent header for HTTP requests.
 | 
						||
USER_AGENT = os.environ.get(
 | 
						||
    "WEBSOCKETS_USER_AGENT",
 | 
						||
    f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
 | 
						||
)
 | 
						||
 | 
						||
# Server header for HTTP responses.
 | 
						||
SERVER = os.environ.get(
 | 
						||
    "WEBSOCKETS_SERVER",
 | 
						||
    f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
 | 
						||
)
 | 
						||
 | 
						||
# Maximum total size of headers is around 128 * 8 KiB = 1 MiB.
 | 
						||
MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128"))
 | 
						||
 | 
						||
# Limit request line and header lines. 8KiB is the most common default
 | 
						||
# configuration of popular HTTP servers.
 | 
						||
MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192"))
 | 
						||
 | 
						||
# Support for HTTP response bodies is intended to read an error message
 | 
						||
# returned by a server. It isn't designed to perform large file transfers.
 | 
						||
MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576"))  # 1 MiB
 | 
						||
 | 
						||
 | 
						||
def d(value: bytes) -> str:
 | 
						||
    """
 | 
						||
    Decode a bytestring for interpolating into an error message.
 | 
						||
 | 
						||
    """
 | 
						||
    return value.decode(errors="backslashreplace")
 | 
						||
 | 
						||
 | 
						||
# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B.
 | 
						||
 | 
						||
# Regex for validating header names.
 | 
						||
 | 
						||
_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
 | 
						||
 | 
						||
# Regex for validating header values.
 | 
						||
 | 
						||
# We don't attempt to support obsolete line folding.
 | 
						||
 | 
						||
# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
 | 
						||
 | 
						||
# The ABNF is complicated because it attempts to express that optional
 | 
						||
# whitespace is ignored. We strip whitespace and don't revalidate that.
 | 
						||
 | 
						||
# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
 | 
						||
 | 
						||
_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
 | 
						||
 | 
						||
 | 
						||
@dataclasses.dataclass
 | 
						||
class Request:
 | 
						||
    """
 | 
						||
    WebSocket handshake request.
 | 
						||
 | 
						||
    Attributes:
 | 
						||
        path: Request path, including optional query.
 | 
						||
        headers: Request headers.
 | 
						||
    """
 | 
						||
 | 
						||
    path: str
 | 
						||
    headers: Headers
 | 
						||
    # body isn't useful is the context of this library.
 | 
						||
 | 
						||
    _exception: Exception | None = None
 | 
						||
 | 
						||
    @property
 | 
						||
    def exception(self) -> Exception | None:  # pragma: no cover
 | 
						||
        warnings.warn(  # deprecated in 10.3 - 2022-04-17
 | 
						||
            "Request.exception is deprecated; use ServerProtocol.handshake_exc instead",
 | 
						||
            DeprecationWarning,
 | 
						||
        )
 | 
						||
        return self._exception
 | 
						||
 | 
						||
    @classmethod
 | 
						||
    def parse(
 | 
						||
        cls,
 | 
						||
        read_line: Callable[[int], Generator[None, None, bytes]],
 | 
						||
    ) -> Generator[None, None, Request]:
 | 
						||
        """
 | 
						||
        Parse a WebSocket handshake request.
 | 
						||
 | 
						||
        This is a generator-based coroutine.
 | 
						||
 | 
						||
        The request path isn't URL-decoded or validated in any way.
 | 
						||
 | 
						||
        The request path and headers are expected to contain only ASCII
 | 
						||
        characters. Other characters are represented with surrogate escapes.
 | 
						||
 | 
						||
        :meth:`parse` doesn't attempt to read the request body because
 | 
						||
        WebSocket handshake requests don't have one. If the request contains a
 | 
						||
        body, it may be read from the data stream after :meth:`parse` returns.
 | 
						||
 | 
						||
        Args:
 | 
						||
            read_line: Generator-based coroutine that reads a LF-terminated
 | 
						||
                line or raises an exception if there isn't enough data
 | 
						||
 | 
						||
        Raises:
 | 
						||
            EOFError: If the connection is closed without a full HTTP request.
 | 
						||
            SecurityError: If the request exceeds a security limit.
 | 
						||
            ValueError: If the request isn't well formatted.
 | 
						||
 | 
						||
        """
 | 
						||
        # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1
 | 
						||
 | 
						||
        # Parsing is simple because fixed values are expected for method and
 | 
						||
        # version and because path isn't checked. Since WebSocket software tends
 | 
						||
        # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
 | 
						||
 | 
						||
        try:
 | 
						||
            request_line = yield from parse_line(read_line)
 | 
						||
        except EOFError as exc:
 | 
						||
            raise EOFError("connection closed while reading HTTP request line") from exc
 | 
						||
 | 
						||
        try:
 | 
						||
            method, raw_path, protocol = request_line.split(b" ", 2)
 | 
						||
        except ValueError:  # not enough values to unpack (expected 3, got 1-2)
 | 
						||
            raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
 | 
						||
        if protocol != b"HTTP/1.1":
 | 
						||
            raise ValueError(
 | 
						||
                f"unsupported protocol; expected HTTP/1.1: {d(request_line)}"
 | 
						||
            )
 | 
						||
        if method != b"GET":
 | 
						||
            raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}")
 | 
						||
        path = raw_path.decode("ascii", "surrogateescape")
 | 
						||
 | 
						||
        headers = yield from parse_headers(read_line)
 | 
						||
 | 
						||
        # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
 | 
						||
 | 
						||
        if "Transfer-Encoding" in headers:
 | 
						||
            raise NotImplementedError("transfer codings aren't supported")
 | 
						||
 | 
						||
        if "Content-Length" in headers:
 | 
						||
            raise ValueError("unsupported request body")
 | 
						||
 | 
						||
        return cls(path, headers)
 | 
						||
 | 
						||
    def serialize(self) -> bytes:
 | 
						||
        """
 | 
						||
        Serialize a WebSocket handshake request.
 | 
						||
 | 
						||
        """
 | 
						||
        # Since the request line and headers only contain ASCII characters,
 | 
						||
        # we can keep this simple.
 | 
						||
        request = f"GET {self.path} HTTP/1.1\r\n".encode()
 | 
						||
        request += self.headers.serialize()
 | 
						||
        return request
 | 
						||
 | 
						||
 | 
						||
@dataclasses.dataclass
 | 
						||
class Response:
 | 
						||
    """
 | 
						||
    WebSocket handshake response.
 | 
						||
 | 
						||
    Attributes:
 | 
						||
        status_code: Response code.
 | 
						||
        reason_phrase: Response reason.
 | 
						||
        headers: Response headers.
 | 
						||
        body: Response body.
 | 
						||
 | 
						||
    """
 | 
						||
 | 
						||
    status_code: int
 | 
						||
    reason_phrase: str
 | 
						||
    headers: Headers
 | 
						||
    body: bytes = b""
 | 
						||
 | 
						||
    _exception: Exception | None = None
 | 
						||
 | 
						||
    @property
 | 
						||
    def exception(self) -> Exception | None:  # pragma: no cover
 | 
						||
        warnings.warn(  # deprecated in 10.3 - 2022-04-17
 | 
						||
            "Response.exception is deprecated; "
 | 
						||
            "use ClientProtocol.handshake_exc instead",
 | 
						||
            DeprecationWarning,
 | 
						||
        )
 | 
						||
        return self._exception
 | 
						||
 | 
						||
    @classmethod
 | 
						||
    def parse(
 | 
						||
        cls,
 | 
						||
        read_line: Callable[[int], Generator[None, None, bytes]],
 | 
						||
        read_exact: Callable[[int], Generator[None, None, bytes]],
 | 
						||
        read_to_eof: Callable[[int], Generator[None, None, bytes]],
 | 
						||
        include_body: bool = True,
 | 
						||
    ) -> Generator[None, None, Response]:
 | 
						||
        """
 | 
						||
        Parse a WebSocket handshake response.
 | 
						||
 | 
						||
        This is a generator-based coroutine.
 | 
						||
 | 
						||
        The reason phrase and headers are expected to contain only ASCII
 | 
						||
        characters. Other characters are represented with surrogate escapes.
 | 
						||
 | 
						||
        Args:
 | 
						||
            read_line: Generator-based coroutine that reads a LF-terminated
 | 
						||
                line or raises an exception if there isn't enough data.
 | 
						||
            read_exact: Generator-based coroutine that reads the requested
 | 
						||
                bytes or raises an exception if there isn't enough data.
 | 
						||
            read_to_eof: Generator-based coroutine that reads until the end
 | 
						||
                of the stream.
 | 
						||
 | 
						||
        Raises:
 | 
						||
            EOFError: If the connection is closed without a full HTTP response.
 | 
						||
            SecurityError: If the response exceeds a security limit.
 | 
						||
            LookupError: If the response isn't well formatted.
 | 
						||
            ValueError: If the response isn't well formatted.
 | 
						||
 | 
						||
        """
 | 
						||
        # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2
 | 
						||
 | 
						||
        try:
 | 
						||
            status_line = yield from parse_line(read_line)
 | 
						||
        except EOFError as exc:
 | 
						||
            raise EOFError("connection closed while reading HTTP status line") from exc
 | 
						||
 | 
						||
        try:
 | 
						||
            protocol, raw_status_code, raw_reason = status_line.split(b" ", 2)
 | 
						||
        except ValueError:  # not enough values to unpack (expected 3, got 1-2)
 | 
						||
            raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
 | 
						||
        if protocol != b"HTTP/1.1":
 | 
						||
            raise ValueError(
 | 
						||
                f"unsupported protocol; expected HTTP/1.1: {d(status_line)}"
 | 
						||
            )
 | 
						||
        try:
 | 
						||
            status_code = int(raw_status_code)
 | 
						||
        except ValueError:  # invalid literal for int() with base 10
 | 
						||
            raise ValueError(
 | 
						||
                f"invalid status code; expected integer; got {d(raw_status_code)}"
 | 
						||
            ) from None
 | 
						||
        if not 100 <= status_code < 600:
 | 
						||
            raise ValueError(
 | 
						||
                f"invalid status code; expected 100–599; got {d(raw_status_code)}"
 | 
						||
            )
 | 
						||
        if not _value_re.fullmatch(raw_reason):
 | 
						||
            raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
 | 
						||
        reason = raw_reason.decode("ascii", "surrogateescape")
 | 
						||
 | 
						||
        headers = yield from parse_headers(read_line)
 | 
						||
 | 
						||
        if include_body:
 | 
						||
            body = yield from read_body(
 | 
						||
                status_code, headers, read_line, read_exact, read_to_eof
 | 
						||
            )
 | 
						||
        else:
 | 
						||
            body = b""
 | 
						||
 | 
						||
        return cls(status_code, reason, headers, body)
 | 
						||
 | 
						||
    def serialize(self) -> bytes:
 | 
						||
        """
 | 
						||
        Serialize a WebSocket handshake response.
 | 
						||
 | 
						||
        """
 | 
						||
        # Since the status line and headers only contain ASCII characters,
 | 
						||
        # we can keep this simple.
 | 
						||
        response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
 | 
						||
        response += self.headers.serialize()
 | 
						||
        response += self.body
 | 
						||
        return response
 | 
						||
 | 
						||
 | 
						||
def parse_line(
 | 
						||
    read_line: Callable[[int], Generator[None, None, bytes]],
 | 
						||
) -> Generator[None, None, bytes]:
 | 
						||
    """
 | 
						||
    Parse a single line.
 | 
						||
 | 
						||
    CRLF is stripped from the return value.
 | 
						||
 | 
						||
    Args:
 | 
						||
        read_line: Generator-based coroutine that reads a LF-terminated line
 | 
						||
            or raises an exception if there isn't enough data.
 | 
						||
 | 
						||
    Raises:
 | 
						||
        EOFError: If the connection is closed without a CRLF.
 | 
						||
        SecurityError: If the response exceeds a security limit.
 | 
						||
 | 
						||
    """
 | 
						||
    try:
 | 
						||
        line = yield from read_line(MAX_LINE_LENGTH)
 | 
						||
    except RuntimeError:
 | 
						||
        raise SecurityError("line too long")
 | 
						||
    # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
 | 
						||
    if not line.endswith(b"\r\n"):
 | 
						||
        raise EOFError("line without CRLF")
 | 
						||
    return line[:-2]
 | 
						||
 | 
						||
 | 
						||
def parse_headers(
 | 
						||
    read_line: Callable[[int], Generator[None, None, bytes]],
 | 
						||
) -> Generator[None, None, Headers]:
 | 
						||
    """
 | 
						||
    Parse HTTP headers.
 | 
						||
 | 
						||
    Non-ASCII characters are represented with surrogate escapes.
 | 
						||
 | 
						||
    Args:
 | 
						||
        read_line: Generator-based coroutine that reads a LF-terminated line
 | 
						||
            or raises an exception if there isn't enough data.
 | 
						||
 | 
						||
    Raises:
 | 
						||
        EOFError: If the connection is closed without complete headers.
 | 
						||
        SecurityError: If the request exceeds a security limit.
 | 
						||
        ValueError: If the request isn't well formatted.
 | 
						||
 | 
						||
    """
 | 
						||
    # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
 | 
						||
 | 
						||
    # We don't attempt to support obsolete line folding.
 | 
						||
 | 
						||
    headers = Headers()
 | 
						||
    for _ in range(MAX_NUM_HEADERS + 1):
 | 
						||
        try:
 | 
						||
            line = yield from parse_line(read_line)
 | 
						||
        except EOFError as exc:
 | 
						||
            raise EOFError("connection closed while reading HTTP headers") from exc
 | 
						||
        if line == b"":
 | 
						||
            break
 | 
						||
 | 
						||
        try:
 | 
						||
            raw_name, raw_value = line.split(b":", 1)
 | 
						||
        except ValueError:  # not enough values to unpack (expected 2, got 1)
 | 
						||
            raise ValueError(f"invalid HTTP header line: {d(line)}") from None
 | 
						||
        if not _token_re.fullmatch(raw_name):
 | 
						||
            raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
 | 
						||
        raw_value = raw_value.strip(b" \t")
 | 
						||
        if not _value_re.fullmatch(raw_value):
 | 
						||
            raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
 | 
						||
 | 
						||
        name = raw_name.decode("ascii")  # guaranteed to be ASCII at this point
 | 
						||
        value = raw_value.decode("ascii", "surrogateescape")
 | 
						||
        headers[name] = value
 | 
						||
 | 
						||
    else:
 | 
						||
        raise SecurityError("too many HTTP headers")
 | 
						||
 | 
						||
    return headers
 | 
						||
 | 
						||
 | 
						||
def read_body(
 | 
						||
    status_code: int,
 | 
						||
    headers: Headers,
 | 
						||
    read_line: Callable[[int], Generator[None, None, bytes]],
 | 
						||
    read_exact: Callable[[int], Generator[None, None, bytes]],
 | 
						||
    read_to_eof: Callable[[int], Generator[None, None, bytes]],
 | 
						||
) -> Generator[None, None, bytes]:
 | 
						||
    # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
 | 
						||
 | 
						||
    # Since websockets only does GET requests (no HEAD, no CONNECT), all
 | 
						||
    # responses except 1xx, 204, and 304 include a message body.
 | 
						||
    if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
 | 
						||
        return b""
 | 
						||
 | 
						||
    # MultipleValuesError is sufficiently unlikely that we don't attempt to
 | 
						||
    # handle it when accessing headers. Instead we document that its parent
 | 
						||
    # class, LookupError, may be raised.
 | 
						||
    # Conversions from str to int are protected by sys.set_int_max_str_digits..
 | 
						||
 | 
						||
    elif (coding := headers.get("Transfer-Encoding")) is not None:
 | 
						||
        if coding != "chunked":
 | 
						||
            raise NotImplementedError(f"transfer coding {coding} isn't supported")
 | 
						||
 | 
						||
        body = b""
 | 
						||
        while True:
 | 
						||
            chunk_size_line = yield from parse_line(read_line)
 | 
						||
            raw_chunk_size = chunk_size_line.split(b";", 1)[0]
 | 
						||
            # Set a lower limit than default_max_str_digits; 1 EB is plenty.
 | 
						||
            if len(raw_chunk_size) > 15:
 | 
						||
                str_chunk_size = raw_chunk_size.decode(errors="backslashreplace")
 | 
						||
                raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes")
 | 
						||
            chunk_size = int(raw_chunk_size, 16)
 | 
						||
            if chunk_size == 0:
 | 
						||
                break
 | 
						||
            if len(body) + chunk_size > MAX_BODY_SIZE:
 | 
						||
                raise SecurityError(
 | 
						||
                    f"chunk too large: {chunk_size} bytes after {len(body)} bytes"
 | 
						||
                )
 | 
						||
            body += yield from read_exact(chunk_size)
 | 
						||
            if (yield from read_exact(2)) != b"\r\n":
 | 
						||
                raise ValueError("chunk without CRLF")
 | 
						||
        # Read the trailer.
 | 
						||
        yield from parse_headers(read_line)
 | 
						||
        return body
 | 
						||
 | 
						||
    elif (raw_content_length := headers.get("Content-Length")) is not None:
 | 
						||
        # Set a lower limit than default_max_str_digits; 1 EiB is plenty.
 | 
						||
        if len(raw_content_length) > 18:
 | 
						||
            raise SecurityError(f"body too large: {raw_content_length} bytes")
 | 
						||
        content_length = int(raw_content_length)
 | 
						||
        if content_length > MAX_BODY_SIZE:
 | 
						||
            raise SecurityError(f"body too large: {content_length} bytes")
 | 
						||
        return (yield from read_exact(content_length))
 | 
						||
 | 
						||
    else:
 | 
						||
        try:
 | 
						||
            return (yield from read_to_eof(MAX_BODY_SIZE))
 | 
						||
        except RuntimeError:
 | 
						||
            raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")
 |