<?php
/**
 * SEO
 *
 * Enhances WordPress SEO by optimizing media metadata, improving excerpts, and structuring content.
 *
 * @package StrategySuite
 */

namespace StrategySuite;

/**
 * Class SEO
 *
 * Implements SEO-related optimizations, including media metadata updates, content formatting, and improved excerpts.
 */
class SEO extends \StrategySuite\Module {

	/**
	 * Determines if the module can be registered.
	 *
	 * @return bool Always returns true.
	 */
	public function can_register() {
		return true;
	}

	/**
	 * Registers hooks for various SEO improvements.
	 *
	 * This function sets up filters and actions to enhance WordPress SEO, including:
	 * - Adding titles to uploaded images
	 * - Improving excerpt formatting
	 * - Adding IDs to header tags for deep linking
	 *
	 * @return void
	 */
	public function register() {
		add_action( 'add_attachment', [ $this, 'add_title_to_attachments' ] );
		add_filter( 'media_send_to_editor', [ $this, 'insert_image_titles' ], 15, 2 );
		add_filter( 'wp_get_attachment_image_attributes', [ $this, 'add_async_attr_to_images' ] );
		add_filter( 'the_content', [ $this, 'add_ids_to_header_tags' ] );
		add_filter( 'excerpt_more', [ $this, 'replace_readmore' ] );
		add_filter( 'get_the_excerpt', [ $this, 'strip_header_tags_from_excerpt' ], 5 );
		add_filter( 'excerpt_length', [ $this, 'excerpt_word_count' ], 999, 1 );
		add_filter( 'robots_txt', [ $this, 'customize_robots_txt' ], PHP_INT_MAX, 2 );
	}

	/**
	 * Automatically adds a title to uploaded images.
	 *
	 * Removes hyphens, capitalizes the first letter of every word, and updates the media title and alt text.
	 *
	 * @param int $post_ID The ID of the media attachment.
	 *
	 * @return void
	 */
	public function add_title_to_attachments( $post_ID ) {

		// Check if uploaded file is an image, else do nothing
		if ( wp_attachment_is_image( $post_ID ) ) {

			$img_title = get_post( $post_ID )->post_title;
			// Sanitize the title: remove hyphens, underscores & extra spaces
			$img_title = preg_replace( '%\s*[-_\s]+\s*%', ' ', $img_title );
			// Sanitize the title: capitalize first letter of every word (other letters lower case)
			$img_title = ucwords( strtolower( $img_title ) );
			// Create an array with the image meta to be updated
			$img_meta = array(
				'ID' => $post_ID,
				'post_title' => $img_title,
			);
			// Set the image Alt-Text
			update_post_meta( $post_ID, '_wp_attachment_image_alt', $img_title );
			// Set the image meta
			wp_update_post( $img_meta );
		}
	}

	/**
	 * Sets the title and decoding attribute on images inserted via the editor.
	 *
	 * @param string $html The HTML output generated by the editor.
	 * @param int    $id The ID of the media attachment.
	 *
	 * @return string The modified HTML output.
	 */
	public function insert_image_titles( $html, $id ) {
		$attachment = get_post( $id );
		$title = $attachment->post_title;
		return str_replace( '<img', '<img decoding="async" title="' . $title . '" ', $html );
	}

	/**
	 * Adds the `decoding="async"` attribute to all images.
	 *
	 * Improves page performance by allowing images to load asynchronously.
	 *
	 * @param array $attr The attributes of an image.
	 *
	 * @return array The modified attributes.
	 */
	public function add_async_attr_to_images( $attr ) {
		$attr['decoding'] = 'async';
		return $attr;
	}

	/**
	 * Adds an `id` attribute to header tags in posts.
	 *
	 * Enables deep-linking to specific sections of content and supports table of contents plugins.
	 *
	 * @param string $content The post content.
	 *
	 * @return string The filtered content with header IDs added.
	 */
	public function add_ids_to_header_tags( $content ) {

		if ( ! is_single() ) {
			return $content;
		}

		$pattern = '#(?P<full_tag><(?P<tag_name>h\d)(?P<tag_extra>[^>]*)>(?P<tag_contents>[^<]*)</h\d>)#i';
		if ( preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER ) ) {
			$find = array();
			$replace = array();
			foreach ( $matches as $match ) {
				if ( strlen( $match['tag_extra'] ) && false !== stripos( $match['tag_extra'], 'id=' ) ) {
					continue;
				}
				$find[]    = $match['full_tag'];
				$id        = sanitize_title( $match['tag_contents'] );
				$id_attr   = sprintf( ' id="%s"', $id );
				$replace[] = sprintf( '<%1$s%2$s%3$s>%4$s</%1$s>', $match['tag_name'], $match['tag_extra'], $id_attr, $match['tag_contents'] );
			}
			$content = str_replace( $find, $replace, $content );
		}

		return $content;
	}

	/**
	 * Replaces the default `[...]` excerpt more text with an ellipsis.
	 *
	 * @param string $more_string The default excerpt more string.
	 *
	 * @return string The modified excerpt ending.
	 */
	public function replace_readmore( $more_string ) {
		return ' &hellip;';
	}

	/**
	 * Removes header tags from excerpts.
	 *
	 * Ensures that headers do not appear in auto-generated excerpts while maintaining readable content.
	 *
	 * @param string $text The excerpt text.
	 *
	 * @return string The cleaned excerpt text.
	 */
	public function strip_header_tags_from_excerpt( $text ) {
		$raw_excerpt = $text;
		$excerpt = '';

		/***Change the excerpt word count.*/
		$excerpt_word_count = 40;
		$excerpt_length = apply_filters( 'excerpt_length', $excerpt_word_count );

		/*** Change the excerpt ending.*/
		$excerpt_end = '...';

		if ( ! empty( $text ) ) {
			return $text;
		}

		// Retrieve the post content.
		$text = get_the_content( '' );
		// remove shortcode tags from the given content.
		$text = strip_shortcodes( $text );

		$parsed_block = parse_blocks( $text );

		if ( ! empty( $parsed_block ) ) {
			// get the first block that is of blockName "core/paragraph" or acf/text-editor
			foreach ( $parsed_block as $block ) {
				if ( 'acf/text-editor' == $block['blockName'] ) {
					$text = $block['attrs']['data']['content'];
					break;
				} elseif ( 'core/paragraph' == $block['blockName'] ) {
					$text = $block['innerHTML'];
					break;
				}
			}
		}

		$text = str_replace( ']]>', ']]&gt;', $text );

		// Regular expression that strips the header tags and their content.
		$regex = '#(<h([1-6])[^>]*>)\s?(.*)?\s?(<\/h\2>)#';
		$text = preg_replace( $regex, '', $text );

		$excerpt = wp_trim_words( $text, $excerpt_length, $excerpt_end );

		return apply_filters( 'wp_trim_excerpt', $excerpt, $raw_excerpt );
	}

	/**
	 * Sets the default excerpt word count.
	 *
	 * @param int $length The default excerpt length.
	 *
	 * @return int The modified excerpt length.
	 */
	public function excerpt_word_count( $length ) {
		return 40;
	}

	/**
	 * Appends custom disallow rules for specific bots to the WordPress robots.txt output.
	 *
	 * This method adds a base set of disallow rules for sensitive paths
	 * and dynamically builds disallow entries for a list of blocked user agents.
	 * The bot list is loaded from an external file (`bad-bots.php`) which returns an array
	 * of user-agent strings to block. The resulting content is appended to
	 * the standard WordPress-generated robots.txt.
	 *
	 * Example:
	 *   add_filter( 'robots_txt', [ $this, 'customize_robots_txt' ], 10, 2 );
	 *
	 * @param string $output The current robots.txt contents generated by WordPress.
	 * @param bool   $public Whether the site is visible to search engines (from Settings → Reading).
	 *
	 * @return string Modified robots.txt content including custom bot blocking rules.
	 *
	 * @package StrategySuite
	 * @since 1.0.0
	 */
	public function customize_robots_txt( $output, $public ) {

		// Your custom rules
		$base_rules = <<<ROBOTS
		User-agent: *
		Disallow: /wp/wp-admin/
		Disallow: /*?add-to-cart
		Disallow: /*?remove_item
		Disallow: /*?s
		Allow: /wp/wp-admin/admin-ajax.php
		ROBOTS;

		$blocked_bots = require STRATEGY_SUITE_INC . 'bad-bots.php';
		$bot_rules = '';
		foreach ( $blocked_bots as $bot ) {
			$bot_rules .= "User-agent: {$bot}\nDisallow: /\n";
		}

		// Append everything to the default WordPress robots.txt output
		$output .= "\n\n" . $base_rules . "\n\n" . $bot_rules;

		return $output;
	}
}
