Documentation

Replacing data within outputted HTML using DOMDocument and DOMXPath

Posted in: Getting Started (4) How To (15) Plugin Options Explained (41) Actions / Filters (32)      Html Content (5)    Replace (10)  

Plugin provide multiple options to control outputted HTML data, however giving the complexity of existing WordPress themes and plugins, sometimes certain area may not be covered. In such cases you can extend plugin functionality (see Create a custom Module Component, extending the plugin functionality), or through filters wph/ob_start_callback which would be the easiest way.

HTML data processing through DOMDocument and DOMXPath is a powerful method to control everything on the outputted code, which ensure is also a valid markup. This is a basic example on how to parse the buffer into a DOMDocument and then DOMXPath object


        add_filter( 'wph/ob_start_callback', 'custom_ob_start_callback' ); 
    function custom_ob_start_callback( $buffer )
        {
            //check if PHP DOMDocument instance is available
            if ( ! class_exists( 'DOMDocument', false ) )
                return $buffer;

            $doc = new DOMDocument();
            $doc->preserveWhiteSpace    = true;
                                
            //preserve html document encoding
            if ( @$doc->loadHTML(mb_convert_encoding($buffer, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD) === false )
                return $buffer;
            
            $doc->encoding              = 'UTF-8';
            $doc->formatOutput          = true;
            
            //use XPath    
            $xpath = new DOMXPath($doc);
           
            //further data manipulation goes here
            //search and iteration area
            
            $doc->normalizeDocument();
            
            $buffer =   $doc->saveHTML( $dom->documentElement );
                
            return $buffer;
               
        }

Presuming we need a class ‘envato’ replaced for all instances within HTML, we can use the following code:


        add_filter( 'wph/ob_start_callback', 'custom_ob_start_callback' ); 
    function custom_ob_start_callback( $buffer )
        {
            //check if PHP DOMDocument instance is available
            if ( ! class_exists( 'DOMDocument', false ) )
                return $buffer;

            $doc = new DOMDocument();
            $doc->preserveWhiteSpace    = true;
                                
            //preserve html document encoding
            if ( @$doc->loadHTML(mb_convert_encoding($buffer, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD) === false )
                return $buffer;
            
            $doc->encoding              = 'UTF-8';
            $doc->formatOutput          = true;
            
            //use XPath    
            $xpath = new DOMXPath($doc);
            
            //make a search on envato class
            $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " envato ")]');
            if($nodes->length < 1)
                return $buffer;
            
            //iterate through all founds    
            foreach ($nodes as $node) 
                {
                    $class  =   $node->getAttribute('class');
                    $class  =   str_replace('envato', '', $class);
                    $class  =   trim($class);
                    
                    $node->attributes->getNamedItem("class")->nodeValue = $class;
                }
            
            $doc->normalizeDocument();
            
            $buffer =   $doc->saveHTML( $dom->documentElement );
                
            return $buffer;
               
        }

DOMXPath is a powerful engine to traverse and query HTML similar to the way things are done in JavaScript and CSS. More details can be found at The DOMXPath class

Share on FacebookShare on Google+Tweet about this on TwitterShare on LinkedIn
Scroll to top