Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
82.42% |
75 / 91 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
| ResponseParser | |
82.42% |
75 / 91 |
|
0.00% |
0 / 7 |
44.44 | |
0.00% |
0 / 1 |
| __construct | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
4.12 | |||
| parse | |
95.83% |
23 / 24 |
|
0.00% |
0 / 1 |
8 | |||
| findHeaderBodySeparator | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
| parseHeaderBlock | |
70.59% |
12 / 17 |
|
0.00% |
0 / 1 |
8.25 | |||
| extractDecodedBody | |
66.67% |
10 / 15 |
|
0.00% |
0 / 1 |
7.33 | |||
| parseEncapsulatedHeader | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| decodeChunked | |
89.47% |
17 / 19 |
|
0.00% |
0 / 1 |
6.04 | |||
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * SPDX-License-Identifier: EUPL-1.2 |
| 5 | * |
| 6 | * This file is part of icap-flow. |
| 7 | * |
| 8 | * Licensed under the EUPL, Version 1.2 only (the "Licence"); |
| 9 | * you may not use this work except in compliance with the Licence. |
| 10 | * You may obtain a copy of the Licence at: |
| 11 | * |
| 12 | * https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 |
| 13 | * |
| 14 | * Unless required by applicable law or agreed to in writing, software |
| 15 | * distributed under the Licence is distributed on an "AS IS" basis, |
| 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | */ |
| 18 | |
| 19 | declare(strict_types=1); |
| 20 | |
| 21 | namespace Ndrstmr\Icap; |
| 22 | |
| 23 | use Ndrstmr\Icap\DTO\IcapResponse; |
| 24 | use Ndrstmr\Icap\Exception\IcapMalformedResponseException; |
| 25 | |
| 26 | /** |
| 27 | * Parses raw ICAP server responses into {@link IcapResponse} value |
| 28 | * objects, honouring the `Encapsulated` header (RFC 3507 §4.4) to split |
| 29 | * the encapsulated block into HTTP-header and HTTP-body sections before |
| 30 | * decoding the HTTP chunked transfer encoding of the latter. |
| 31 | */ |
| 32 | final class ResponseParser implements ResponseParserInterface |
| 33 | { |
| 34 | private const int DEFAULT_MAX_HEADER_COUNT = 100; |
| 35 | private const int DEFAULT_MAX_HEADER_LINE = 8192; |
| 36 | |
| 37 | public function __construct( |
| 38 | private readonly int $maxHeaderCount = self::DEFAULT_MAX_HEADER_COUNT, |
| 39 | private readonly int $maxHeaderLineLength = self::DEFAULT_MAX_HEADER_LINE, |
| 40 | ) { |
| 41 | if ($maxHeaderCount < 1 || $maxHeaderLineLength < 1) { |
| 42 | throw new \InvalidArgumentException('Parser limits must be >= 1'); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | #[\Override] |
| 47 | public function parse(string $rawResponse): IcapResponse |
| 48 | { |
| 49 | $separatorPos = $this->findHeaderBodySeparator($rawResponse); |
| 50 | if ($separatorPos === null) { |
| 51 | throw new IcapMalformedResponseException('Invalid ICAP response: missing header/body separator'); |
| 52 | } |
| 53 | |
| 54 | $head = substr($rawResponse, 0, $separatorPos); |
| 55 | $encapsulatedBlock = substr($rawResponse, $separatorPos + 4); |
| 56 | |
| 57 | $lines = preg_split('/\r?\n/', $head); |
| 58 | if ($lines === false || count($lines) === 0) { |
| 59 | throw new IcapMalformedResponseException('Invalid ICAP response: no lines'); |
| 60 | } |
| 61 | |
| 62 | // +1 for the status line itself. |
| 63 | if (count($lines) > $this->maxHeaderCount + 1) { |
| 64 | throw new IcapMalformedResponseException( |
| 65 | sprintf('ICAP response exceeded max header count (%d).', $this->maxHeaderCount), |
| 66 | ); |
| 67 | } |
| 68 | |
| 69 | foreach ($lines as $line) { |
| 70 | if (strlen($line) > $this->maxHeaderLineLength) { |
| 71 | throw new IcapMalformedResponseException( |
| 72 | sprintf('ICAP response header exceeded max line length (%d).', $this->maxHeaderLineLength), |
| 73 | ); |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | $statusLine = array_shift($lines); |
| 78 | if (!preg_match('/^ICAP\/1\.\d\s+(\d+)(?:\s+.*)?$/', (string) $statusLine, $m)) { |
| 79 | throw new IcapMalformedResponseException('Invalid status line: ' . (string) $statusLine); |
| 80 | } |
| 81 | $statusCode = (int) $m[1]; |
| 82 | |
| 83 | $headers = $this->parseHeaderBlock($lines); |
| 84 | |
| 85 | $body = $this->extractDecodedBody($encapsulatedBlock, $headers); |
| 86 | |
| 87 | return new IcapResponse($statusCode, $headers, $body); |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * Locate the `\r\n\r\n` (or `\n\n`) that separates the ICAP header |
| 92 | * block from the encapsulated block. Returns the offset of the first |
| 93 | * byte of the separator, or null if not found. |
| 94 | */ |
| 95 | private function findHeaderBodySeparator(string $raw): ?int |
| 96 | { |
| 97 | $crlf = strpos($raw, "\r\n\r\n"); |
| 98 | if ($crlf !== false) { |
| 99 | return $crlf; |
| 100 | } |
| 101 | $lf = strpos($raw, "\n\n"); |
| 102 | if ($lf !== false) { |
| 103 | return $lf; |
| 104 | } |
| 105 | return null; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Parse a flat list of header lines into a {@code name => list<value>} |
| 110 | * map. Honours RFC 7230 §3.2.4 obsolete line folding: a line that |
| 111 | * starts with HTAB or SP is treated as a continuation of the |
| 112 | * previous header value (joined with a single space). c-icap uses |
| 113 | * this folding form for the multi-line `X-Violations-Found` header |
| 114 | * defined in RFC 3507 §6.4. |
| 115 | * |
| 116 | * @param list<string> $lines |
| 117 | * @return array<string, string[]> |
| 118 | */ |
| 119 | private function parseHeaderBlock(array $lines): array |
| 120 | { |
| 121 | /** @var array<string, string[]> $headers */ |
| 122 | $headers = []; |
| 123 | $lastName = null; |
| 124 | |
| 125 | foreach ($lines as $line) { |
| 126 | if ($line === '') { |
| 127 | continue; |
| 128 | } |
| 129 | |
| 130 | // Continuation line — append to the previous header's last |
| 131 | // value with a single SP, the canonical "unfolded" form. |
| 132 | if ($lastName !== null && ($line[0] === " " || $line[0] === "\t")) { |
| 133 | $lastIdx = count($headers[$lastName]) - 1; |
| 134 | $headers[$lastName][$lastIdx] .= ' ' . trim($line); |
| 135 | continue; |
| 136 | } |
| 137 | |
| 138 | $colon = strpos($line, ':'); |
| 139 | if ($colon === false) { |
| 140 | throw new IcapMalformedResponseException('Malformed header line: ' . $line); |
| 141 | } |
| 142 | $name = trim(substr($line, 0, $colon)); |
| 143 | $value = trim(substr($line, $colon + 1)); |
| 144 | $headers[$name][] = $value; |
| 145 | $lastName = $name; |
| 146 | } |
| 147 | |
| 148 | return $headers; |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * Interpret the `Encapsulated` header and return the decoded HTTP |
| 153 | * body as raw bytes. An empty string is returned when the response |
| 154 | * carries no body (null-body, 204, OPTIONS, etc.). |
| 155 | * |
| 156 | * @param array<string, string[]> $icapHeaders |
| 157 | */ |
| 158 | private function extractDecodedBody(string $encapsulatedBlock, array $icapHeaders): string |
| 159 | { |
| 160 | if ($encapsulatedBlock === '') { |
| 161 | return ''; |
| 162 | } |
| 163 | |
| 164 | $encapsulatedHeader = $icapHeaders['Encapsulated'][0] ?? null; |
| 165 | if ($encapsulatedHeader === null) { |
| 166 | // No Encapsulated header → fall back to treating the trailing |
| 167 | // bytes as a plain body (best-effort). |
| 168 | return $encapsulatedBlock; |
| 169 | } |
| 170 | |
| 171 | $entries = $this->parseEncapsulatedHeader($encapsulatedHeader); |
| 172 | $bodyOffset = $entries['req-body'] ?? $entries['res-body'] ?? null; |
| 173 | if ($bodyOffset === null) { |
| 174 | // null-body or header-only: no encapsulated HTTP body to decode. |
| 175 | return ''; |
| 176 | } |
| 177 | |
| 178 | if ($bodyOffset < 0 || $bodyOffset > strlen($encapsulatedBlock)) { |
| 179 | throw new IcapMalformedResponseException( |
| 180 | 'Encapsulated body offset out of range: ' . $bodyOffset, |
| 181 | ); |
| 182 | } |
| 183 | |
| 184 | $chunked = substr($encapsulatedBlock, $bodyOffset); |
| 185 | return $this->decodeChunked($chunked); |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * @return array<string, int> |
| 190 | */ |
| 191 | private function parseEncapsulatedHeader(string $value): array |
| 192 | { |
| 193 | $entries = []; |
| 194 | foreach (array_map('trim', explode(',', $value)) as $pair) { |
| 195 | if ($pair === '' || !str_contains($pair, '=')) { |
| 196 | continue; |
| 197 | } |
| 198 | [$k, $v] = explode('=', $pair, 2); |
| 199 | $entries[trim($k)] = (int) trim($v); |
| 200 | } |
| 201 | return $entries; |
| 202 | } |
| 203 | |
| 204 | /** |
| 205 | * Decode an HTTP/1.1 chunked transfer-coded byte stream. Chunk |
| 206 | * extensions (e.g. `0; ieof`) are tolerated but ignored — their |
| 207 | * semantics belong on the request side (§4.5). |
| 208 | */ |
| 209 | private function decodeChunked(string $chunked): string |
| 210 | { |
| 211 | $decoded = ''; |
| 212 | $i = 0; |
| 213 | $len = strlen($chunked); |
| 214 | |
| 215 | while ($i < $len) { |
| 216 | $eol = strpos($chunked, "\r\n", $i); |
| 217 | if ($eol === false) { |
| 218 | // Not chunked, or truncated — return what we have. |
| 219 | break; |
| 220 | } |
| 221 | $sizeLine = substr($chunked, $i, $eol - $i); |
| 222 | // Strip any chunk-extension (after ';'). |
| 223 | $sizeHex = explode(';', $sizeLine, 2)[0]; |
| 224 | $sizeHex = trim($sizeHex); |
| 225 | if ($sizeHex === '' || !ctype_xdigit($sizeHex)) { |
| 226 | break; |
| 227 | } |
| 228 | $size = (int) hexdec($sizeHex); |
| 229 | $i = $eol + 2; |
| 230 | if ($size === 0) { |
| 231 | break; |
| 232 | } |
| 233 | $decoded .= substr($chunked, $i, $size); |
| 234 | $i += $size + 2; // skip CRLF after chunk |
| 235 | } |
| 236 | |
| 237 | return $decoded; |
| 238 | } |
| 239 | } |