Table of contents

Abstract

Introduction

The basic concepts

Stylesheet organization

Root template

Match processing instruction nodes

Match text nodes

Match attributes

Match other nodes

Keepspace template

Using the stylesheet

The complete stylesheet

The complete stylesheet

Here’s the complete stylesheet. You can also download the stylesheet from Scriptorium’s web site.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

    <xsl:variable name="WHITESPACE">
      <xsl:text>&#x09;&#x0A;&#x0D;&#x20;</xsl:text>
   </xsl:variable>

<!-- DEBUGGING use indent="no" in production -->
    <!-- <xsl:output method="xml" indent="yes" xml:space="default" encoding="UTF-8"/> -->
    <xsl:output method="xml" indent="no" xml:space="default" encoding="UTF-8"/>
    <xsl:template match="/">
        <xsl:message>Handling whitespace.</xsl:message>
        <xsl:call-template name="add_doctype"/>
        <xsl:apply-templates select="node()" mode="identity"/>
    </xsl:template>
    <!-- FrameMaker doesn't like non-FrameMaker processing instructions.This template removes      them.-->
    <xsl:template match="node()[self::processing-instruction()]" mode="identity">
        <xsl:choose>
            <xsl:when test="name() = 'Fm'">
                <xsl:message>Copying processing instruction
                   (name is "<xsl:value-of select="name()"/>").</xsl:message>
                <xsl:copy/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:message>Ignoring processing instruction
                  (name is "<xsl:value-of select="name()"/>").</xsl:message>
                <!-- Do nothing. -->
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    <!-- Have to make sure that space before and after is treated correctly.
         If the text node is in a block that contains inline elements (<ph>, <b>, and so on),
         the spaces must be preserved at the beginning and end of strings. On the other hand,
         it's equally important to make sure the FIRST whitespace in a block IS stripped.    -->
    <xsl:template match="node()[self::text()]" mode="identity">
        <!-- Create a variable for the whitespace before the text node. -->
        <xsl:variable name="fore_space">
            <xsl:choose>
                <!-- If after normalizing space the length is zero, consider contents. -->
                <xsl:when test="string-length(normalize-space(.)) = 0">
                    <!-- If this is a single space between elements, allow it. -->
                    <xsl:choose>
                        <xsl:when test="string-length(.) = 1 and string(.) = ' '">
                            <xsl:value-of select="' '"/>
                        </xsl:when>
                        <xsl:otherwise>
                            <xsl:value-of select="''"/>
                        </xsl:otherwise>
                    </xsl:choose>
                </xsl:when>
                <!-- When the text node is the beginning of a paragraph, value is empty string. -->
                <xsl:when test="position() = 1">
                    <xsl:value-of select="''"/>
                </xsl:when>
                <!-- When the first character in the text node is a whitespace char, value is space. -->
                <xsl:when test="contains($WHITESPACE,substring(.,1,1))">
                    <xsl:value-of select="' '"/>
                </xsl:when>
            </xsl:choose>
        </xsl:variable>
        <!-- Create a variable for the whitespace after the text node.
             Note that in this case, we're not worried about the last text node in a block element. -->
        <xsl:variable name="aft_space">
            <xsl:choose>
                <!-- If after normalizing space the length is zero, value is empty string. -->
                <xsl:when test="string-length(normalize-space(.)) = 0">
                    <xsl:value-of select="''"/>
                </xsl:when>
                <!-- When the last character in the text node is a whitespace char, value is space. -->
                <xsl:when test="contains($WHITESPACE,substring(.,string-length(.),1))">
                    <xsl:value-of select="' '"/>
                </xsl:when>
            </xsl:choose>
        </xsl:variable>
        <!-- Build the new version of the text node, using the fore_space, normalized string, and          aft_space. -->
        <xsl:value-of select="$fore_space"/>
        <xsl:value-of select="normalize-space(.)"/>
        <xsl:value-of select="$aft_space"/>
    </xsl:template>
    <xsl:template match="@*" mode="identity">
        <xsl:choose>
            <xsl:when test="name() = 'class'">
                <!-- Do nothing. -->
            </xsl:when>
            <xsl:otherwise>
                <xsl:copy/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    <xsl:template match="node()[not(self::processing-instruction()) and not(self::text())]"       mode="identity">
        <xsl:choose>
            <!-- If the element specifies xml:space="preserve",
             cannot mess with the whitespace, so use keepspace template. -->
            <xsl:when test="@xml:space = 'preserve'">
                <xsl:copy>
                    <xsl:apply-templates select="@*|node()" mode="keepspace"/>
                </xsl:copy>
            </xsl:when>
            <xsl:otherwise>
                <xsl:copy>
                    <xsl:apply-templates select="@*|node()" mode="identity"/>
                </xsl:copy>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    <!-- A standard identity transform for where the whitespace is important. -->
    <!-- Presumes that processing instructions don't exist in these elements. -->
    <xsl:template match="@*|node()" mode="keepspace">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" mode="keepspace"/>
        </xsl:copy>
    </xsl:template>
<!-- Utility function to add the doctype. Handles DITA "standard" topic types.
         If you specialize, you may have to add others. NOTE whitespace is important here.
         If you pretty print this stylesheet, make sure there's a single CR before each
         "<!DOCTYPE" (with no spaces before "<") and a CR after the ">". -->
    <xsl:template name="add_doctype">
        <xsl:choose>
            <xsl:when test="/concept">
                <xsl:text disable-output-escaping="yes">&lt;!DOCTYPE concept PUBLIC "-//OASIS//DTD                 DITA Concept//EN" "concept.dtd"&gt;</xsl:text>
            </xsl:when>
            <xsl:when test="/reference">
                <xsl:text disable-output-escaping="yes">&lt;!DOCTYPE reference PUBLIC "-//OASIS//DTD                    DITA Reference//EN" "reference.dtd"&gt;</xsl:text>
            </xsl:when>
            <xsl:when test="/task">
                <xsl:text disable-output-escaping="yes">&lt;!DOCTYPE task PUBLIC "-//OASIS//DTD DITA                   Task//EN" "task.dtd"&gt;</xsl:text>
            </xsl:when>
            <xsl:when test="/topic">
                <xsl:text disable-output-escaping="yes">&lt;!DOCTYPE topic PUBLIC "-//OASIS//DTD                   DITA Topic//EN" "topic.dtd"&gt;</xsl:text>
            </xsl:when>
            <xsl:otherwise>
                <xsl:message>Unknown root element, using topic doctype.</xsl:message>
                <xsl:text disable-output-escaping="yes">&lt;!DOCTYPE topic PUBLIC "-//OASIS//DTD                   DITA Topic//EN" "http://docs.oasis-open.org/dita/v1.1/OS/dtd/topic.dtd"&gt;</xsl:text>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
</xsl:stylesheet>

 


Scriptorium Publishing | Post Office Box 12761 Research Triangle Park, NC 27709 | (919) 481 2701 | info@scriptorium.com