Source: modules/filters/whitespace.js

import Handler from "../handler.js";
import {
	isElement,
	isIgnorable,
	nextSignificantNode,
	previousSignificantNode,
	filterTree,
} from "../../utils/dom.js";

/**
 * Filters and normalizes whitespace-only text nodes that are not visually meaningful.
 *
 * Removes or replaces ignorable text nodes (e.g., extra line breaks, tabs, or spaces)
 * except in contexts where whitespace is meaningful (e.g., inside `<pre>` tags).
 *
 * @class
 * @extends Handler
 */
class WhiteSpaceFilter extends Handler {
	/**
	 * Create a WhiteSpaceFilter instance.
	 *
	 * @param {Object} chunker - Handles document chunking.
	 * @param {Object} polisher - Handles CSS polishing/styling.
	 * @param {Object} caller - The invoking processor or engine.
	 */
	constructor(chunker, polisher, caller) {
		super(chunker, polisher, caller);
	}

	/**
	 * Filters out or normalizes ignorable whitespace-only text nodes from content.
	 *
	 * @param {DocumentFragment | HTMLElement} content - The DOM content to filter.
	 */
	filter(content) {
		filterTree(
			content,
			(node) => {
				return this.filterEmpty(node);
			},
			NodeFilter.SHOW_TEXT,
		);
	}

	/**
	 * Determines whether a text node should be removed or normalized.
	 * Replaces content with a single space if it's between significant siblings,
	 * and removes the node if it's safe to do so.
	 *
	 * @param {Text} node - The text node to evaluate.
	 * @returns {number} A NodeFilter constant indicating filter behavior.
	 */
	filterEmpty(node) {
		if (node.textContent.length > 1 && isIgnorable(node)) {
			const parent = node.parentNode;
			const pre = isElement(parent) && parent.closest("pre");

			// Skip if inside <pre> or similar white-space preserving context
			if (pre) {
				return NodeFilter.FILTER_REJECT;
			}

			const previousSibling = previousSignificantNode(node);
			const nextSibling = nextSignificantNode(node);

			if (nextSibling === null && previousSibling === null) {
				// Only node — keep it, but normalize to a single space
				node.textContent = " ";
				return NodeFilter.FILTER_REJECT;
			}

			if (nextSibling === null || previousSibling === null) {
				// Safe to remove
				return NodeFilter.FILTER_ACCEPT;
			}

			// Normalize to a single space
			node.textContent = " ";
			return NodeFilter.FILTER_REJECT;
		}

		return NodeFilter.FILTER_REJECT;
	}
}

export default WhiteSpaceFilter;

// TODO: we also need to preserve sequences of white spaces when the parent has "white-space" rule:
// pre Sequences of white space are preserved. Lines are only broken at newline characters in the source and at <br> elements.
//
// pre-wrap
// Sequences of white space are preserved. Lines are broken at newline characters, at <br>, and as necessary to fill line boxes.
//
// pre-line
// Sequences of white space are collapsed. Lines are broken at newline characters, at <br>, and as necessary to fill line boxes.
//
// break-spaces
// The behavior is identical to that of pre-wrap, except that:
// - Any sequence of preserved white space always takes up space, including at the end of the line.
// - A line breaking opportunity exists after every preserved white space character, including between white space characters.
// - Such preserved spaces take up space and do not hang, and thus affect the box’s intrinsic sizes (min-content size and max-content size).
//
// See: https://developer.mozilla.org/en-US/docs/Web/CSS/white-space#Values