Java API for XML Processing (JAXP)

Java-XML-Duke

Parser = Programm zum Einlesen von Dokumenten entsprechend einer Syntax-Definition. Das Ergebnis ist eine interne Datenstruktur.

Ausblick: Java API for XML Binding (JAXM)
Generation der Java Klassen direkt aus einer DTD.

DOM und SAX Parsers

Vorteile:

Nachteile:


DOM Parser

DocumentBuilderFactory

javax.xml.parsers.DocumentBuilderFactory dient der Erzeugung eines DOM Parsers javax.xml.parsers.DocumentBuilder.

  protected DocumentBuilderFactory()
  public static newInstance()

  public DocumentBuilder newDocumentBuilder() 

  public boolean isValidating()
  public void setValidating(boolean)

  public boolean isExpandEntityReferences()
  public void setExpandEntityReferences(boolean) 

DocumentBuilder

javax.xml.parsers.DocumentBuilder dient dem Parsen, d.h. dem Einlesen und Aufbau eines org.w3c.dom.Document.

  protected DocumentBuilder()

  public Document newDocument() 

  public Document parse(File f) 
  public Document parse(InputStream is) 
  public Document parse(InputSource is) 
  public Document parse(String uri) 

  public boolean isValidating()

  public void setErrorHandler(ErrorHandler) 

DOMLeser:

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document; 
import org.w3c.dom.Node; 
import org.w3c.dom.Element; 
import java.io.IOException;
import org.xml.sax.SAXException;

public class DOMLeser {

    public static void main(String[] args) {

	DocumentBuilderFactory pfac = DocumentBuilderFactory.newInstance();
	DocumentBuilder parser = null;

	try {
	   pfac.setValidating(true);
	   //  pfac.setCoalescing(true);
           pfac.setExpandEntityReferences(false);
	   parser = pfac.newDocumentBuilder();
	}
	catch (ParserConfigurationException e) {
	    e.printStackTrace();
	}

	Document doc = null;
        String uri = "datei.xml";

	try {
            doc = parser.parse(uri);
	}
	catch (SAXException e) {
	    e.printStackTrace();
	}
	catch (IOException e) {
	    e.printStackTrace();
	}

	Screen sc = new Screen();
	sc.println("doc("+ doc.getClass().getName() + ") = " + doc );

	DOMSchreiber dw = new DOMSchreiber();
	sc.println("document:\n");
        dw.printNode(doc);
        dw.flush();
        dw.close();
    }
}

DOM Node

Die Werte von nodeName, nodeValue, und attributes hängen wie folgt vom Knotentyp ab:

Interface nodeName nodeValue attributes
Attr name of attribute value of attribute null
CDATASection "#cdata-section" content of the CDATA Section null
Comment "#comment" content of the comment null
Document "#document" null null
DocumentFragment "#document-fragment" null null
DocumentType document type name null null
Element tag name null NamedNodeMap
Entity entity name null null
EntityReference name of entity referenced null null
Notation notation name null null
ProcessingInstruction target entire content excluding the target null
Text "#text" content of the text node null

Siehe Document Object Model (DOM) Level 2 Core Specification.

DOMSchreiber:

import org.w3c.dom.Document; 
import org.w3c.dom.Node; 
import org.w3c.dom.NodeList; 
import org.w3c.dom.NamedNodeMap; 
import org.w3c.dom.Element; 
import org.w3c.dom.Text; 
import org.w3c.dom.Comment; 
import org.w3c.dom.EntityReference; 
import org.w3c.dom.DocumentType; 
import java.io.IOException;
import java.io.PrintWriter;
import org.xml.sax.SAXException;


public class DOMSchreiber {

    PrintWriter pw = null;

    public DOMSchreiber() {
	pw = new Screen();
    }

    public DOMSchreiber(PrintWriter p) {
	pw = p;
    }

    public void printNode(Node node) {
	switch ( node.getNodeType() ) {
	case Node.ELEMENT_NODE: 
            printElement( (Element)node );
	    break;
	case Node.ATTRIBUTE_NODE: 
            printAttributes( (NamedNodeMap)node );
	    break;
	case Node.TEXT_NODE: 
            printText( (Text)node );
	    break;
	case Node.CDATA_SECTION_NODE: 
            printCData( (Text)node );
	    break;
	case Node.PROCESSING_INSTRUCTION_NODE: 
            printPI( node );
	    break;
	case Node.COMMENT_NODE: 
            printComment( (Comment)node );
	    break;
	case Node.DOCUMENT_TYPE_NODE: 
            printDoctype( (DocumentType)node );
	    break;
	case Node.DOCUMENT_NODE: 
            printDocument( (Document)node );
	    break;
	case Node.ENTITY_REFERENCE_NODE: 
            printEntityReference( (EntityReference)node );
	    break;
	default: pw.println("<!-- " + node + " -->");
	}
    }


    public void printElement(Element el) {
        String name = el.getNodeName();
        pw.print("<" +  name );
        printAttributes( el.getAttributes() );
        NodeList nl = el.getChildNodes();
        if ( ( nl == null ) || ( nl.getLength() == 0 ) ) {
	    pw.print(" />");
            return;
	}
        pw.print(">"); 
	for (int i = 0; i < nl.getLength(); i++ ){
	    printNode(nl.item(i));
	}
        pw.print("</" + name + ">");

    }


    public void printAttributes(NamedNodeMap at) {
        if ( at == null ) return;
        for ( int i = 0; i < at.getLength(); i++ ){
            Node an = at.item(i);
            pw.print(" " + an.getNodeName() + "=\"");
            pw.print(an.getNodeValue() + "\"");
	}
    }


    public void printText(Text tx) {
	pw.print( tx.getData() );
    }


    public void printEntityReference(EntityReference er) {
	pw.print("&" + er.getNodeName() + ";" );
    }


    public void printComment(Comment c) {
	pw.print("<!--");
	pw.print( c.getData() );
	pw.print("-->");
    }


    public void printDoctype(DocumentType d) {
	pw.print("<!DOCTYPE ");
	pw.print( d.getName() );
        String pid = d.getPublicId();
	if ( pid != null ) {
	    pw.print(" PUBLIC \"" + pid + "\"\n          ");
	} else {
	    pw.print(" SYSTEM " );
	}
        pw.print("\"" + d.getSystemId() + "\" ");
	pw.println(">");
    }


    public void printCData(Text tx) {
	pw.print("<![CDATA[ ");
	pw.print( tx.getNodeValue() );
	pw.print("]]>");
    }


    public void printDocument(Document doc) {
        printNode( doc.getDoctype() );
        printNode( doc.getDocumentElement() );
    }


    public void printPI(Node pi) {
	pw.print("<?" + pi.getNodeName() );
	pw.print(" " + pi.getNodeValue() + "?>" );
    }


    public void flush() {
	pw.flush();
    }


    public void close() {
	pw.close();
    }
}

datei.xhtml:

<?xml version="1.0" encoding="iso-8859-1" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Titel</title>
<?php 
  $a=9; 
  function yyy(a) {
    if ( a > 2 ) { 
       print("<p>nonsense</p>"); 
    }
  }
?>
<!-- funktions definitionen -->
<script type="text/javascript" language="JavaScript">
<![CDATA[ 
  function xxx(a) {
    if ( a > 2 ) { 
       document.write("<p>nonsense</p>"); 
    }
  }
]]>
</script>
</head>
<body bgcolor="white">
<h1>Überschrift A</h1>
<p align="center">Paragraph mit <em>Hervorhebung</em>
und einem <br /> Zeilenumbruch.
</p>
<ul>
<li>Listen Element</li>
<li>Entity Element &abc; </li>
</ul>
<h1>Überschrift B</h1>
<p>Text eines zweiten Paragraphen mit 
<strong>Hervorhebung</strong> 
ohne Zeilenumbruch.
</p>
<h1 count="10">Überschrift C</h1>
<p>Text eines dritten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
<h1>Überschrift D</h1>
<p>Text eines vierten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
</body>
</html>

Ausgabe:

Warning: validation was turned on but an org.xml.sax.ErrorHandler was not
set, which is probably not what is desired.  Parser will use a default
ErrorHandler to print the first 10 errors.  Please call
the 'setErrorHandler' method to fix this.
Error: URI=datei.xhtml Line=41: Attribute "count" must be declared for element type "h1".
doc(org.apache.xerces.dom.DeferredDocumentImpl) = [#document: null]
document:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd" >
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Titel</title>
<?php $a=9; 
  function yyy(a) {
    if ( a > 2 ) { 
       print("<p>nonsense</p>"); 
    }
  }
?>
<!-- funktions definitionen -->
<script language="JavaScript" type="text/javascript">
<![CDATA[  ]]>
</script>
</head>
<body bgcolor="white">
<h1>Überschrift A</h1>
<p align="center">Paragraph mit <em>Hervorhebung</em>
und einem <br clear="none" /> Zeilenumbruch.
</p>
<ul>
<li>Listen Element</li>
<li>Entity Element &abc; </li>
</ul>
<h1>Überschrift B</h1>
<p>Text eines zweiten Paragraphen mit 
<strong>Hervorhebung</strong> 
ohne Zeilenumbruch.
</p>
<h1 count="10">Überschrift C</h1>
<p>Text eines dritten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
<h1>Überschrift D</h1>
<p>Text eines vierten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
</body>
</html>

DOMUtil:

import org.w3c.dom.Document; 
import org.w3c.dom.Node; 
import org.w3c.dom.NodeList; 
import org.w3c.dom.Element; 

public class DOMUtil {

    private static int inc = 0;

    public static void addIdent(Node node, String prefix) {
	switch ( node.getNodeType() ) {
	case Node.ELEMENT_NODE: 
            addElementIdent( (Element)node, prefix );
	    break;
	case Node.DOCUMENT_NODE: 
            addElementIdent( ((Document)node).getDocumentElement(), prefix );
	    break;
	default: ;
	}
    }


    public static void addElementIdent(Element el, String prefix) {
        el.setAttribute("id", "#"+prefix+inc);
        el.setAttribute("name", "#"+prefix+inc++);
        NodeList nl = el.getChildNodes();
        if ( ( nl == null ) || ( nl.getLength() == 0 ) ) {
            return;
	}
	for (int i = 0; i < nl.getLength(); i++ ){
            Node c = nl.item(i);
            addIdent(c, prefix);
	}
    }
}

Anwendung:

// ... wie oben 
        DOMUtil.addIdent(doc,"hk"); 

	sc.println("document:\n");
        dw.printNode(doc);
        dw.flush();
	sc.println();

Ausgabe:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd" >
<html id="#hk0" name="#hk0" xmlns="http://www.w3.org/1999/xhtml">
<head id="#hk1" name="#hk1">
<title id="#hk2" name="#hk2">Titel</title>
<?php $a=9; 
  function yyy(a) {
    if ( a > 2 ) { 
       print("<p>nonsense</p>"); 
    }
  }
?>
<!-- funktions definitionen -->
<script id="#hk3" language="JavaScript" name="#hk3" type="text/javascript">
<![CDATA[  ]]>
</script>
</head>
<body bgcolor="white" id="#hk4" name="#hk4">
<h1 id="#hk5" name="#hk5">Überschrift A</h1>
<p align="center" id="#hk6" name="#hk6">Paragraph mit <em id="#hk7" name="#hk7">Hervorhebung</em>
und einem <br clear="none" id="#hk8" name="#hk8" /> Zeilenumbruch.
</p>
<ul id="#hk9" name="#hk9">
<li id="#hk10" name="#hk10">Listen Element</li>
<li id="#hk11" name="#hk11">Entity Element &abc; </li>
</ul>
<h1 id="#hk12" name="#hk12">Überschrift B</h1>
<p id="#hk13" name="#hk13">Text eines zweiten Paragraphen mit 
<strong id="#hk14" name="#hk14">Hervorhebung</strong> 
ohne Zeilenumbruch.
</p>
<h1 count="10" id="#hk15" name="#hk15">Überschrift C</h1>
<p id="#hk16" name="#hk16">Text eines dritten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
<h1 id="#hk17" name="#hk17">Überschrift D</h1>
<p id="#hk18" name="#hk18">Text eines vierten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
</body>
</html>

SAX Parser

SAXParserFactory

javax.xml.parsers.SAXParserFactory dient der Erzeugung eines SAX Parsers javax.xml.parsers.SAXParser.

  protected SAXParserFactory()
  public static newInstance()

  public SAXParser newSAXParser() 

  boolean isValidating()
  void setValidating(boolean)

  boolean isExpandEntityReferences()
  void setExpandEntityReferences(boolean) 

  boolean isNamespaceAware()
  void setNamespaceAware(boolean) 

SAXParser

javax.xml.parsers.SAXParser dient dem Parsen, d.h. dem Einlesen und Verarbeiten der gefundenen XML Konstrukte.

  protected SAXParser()

  public void parse(File f, DefaultHandler dh) 
  public void parse(InputStream is, DefaultHandler dh) 
  public void parse(InputSource is, DefaultHandler dh) 
  public void parse(String uri, DefaultHandler dh) 

  public boolean isValidating()

  public abstract void setProperty(String name, Object value) 
  public abstract Object getProperty(String name) 

SAXLeser:

import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SAXLeser {

    public static void main(String[] args) {

	SAXParserFactory pfac = SAXParserFactory.newInstance();
	SAXParser parser = null;

	try {
	   pfac.setValidating(true);
	   pfac.setNamespaceAware(true);
	   parser = pfac.newSAXParser();
	}
	catch (ParserConfigurationException e) {
	    e.printStackTrace();
	}
	catch (SAXException e) {
	    e.printStackTrace();
	}

	Screen sc = new Screen();
        String uri = "datei.xhtml";
	DefaultHandler dh = new SAXSchreiber(sc);

	try {
            parser.parse(uri,dh);
	}
	catch (SAXException e) {
	    e.printStackTrace();
	}
	catch (IOException e) {
	    e.printStackTrace();
	}
    }
}

DefaultHandler aus org.xml.sax.helpers,
implementiert die Interfaces ContentHandler, DTDHandler, EntityResolver und ErrorHandler:

  public void startDocument() 
  public void endDocument() 

  public void startElement(String nameuri, String local, String quali, 
                           Attributes attrs) 
  public void endElement(String nameuri, String local, String quali) 

  public void characters(char ch[], int start, int length) 
  public void ignorableWhitespace(char ch[], int start, int length) 

  public void processingInstruction(String target, String data) 

  public void warning(SAXParseException ex) 
  public void error(SAXParseException ex) 
  public void fatalError(SAXParseException ex) 

  public InputSource resolveEntity(String publicId,
                                   String systemId) 

  public void notationDecl(String name, String publicId,
                           String systemId) 

SAXSchreiber:

import java.io.IOException;
import java.io.PrintWriter;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;

public class SAXSchreiber extends DefaultHandler {

    PrintWriter pw = null;

    public SAXSchreiber() {
	pw = new Screen();
    }

    public SAXSchreiber(PrintWriter p) {
	pw = p;
    }


    public void startDocument() {
        pw.println("<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>");
        pw.flush();
    }

    public void endDocument() {
        pw.println();
        pw.flush();
    }


    public void startElement(String nameuri, String local, String quali, 
                             Attributes attrs) {
        pw.print('<');
        pw.print(local);
        if (attrs != null) {
            int len = attrs.getLength();
            for (int i = 0; i < len; i++) {
                pw.print(' ');
                pw.print(attrs.getQName(i));
                pw.print("=\"");
                pw.print(attrs.getValue(i));
                pw.print('"');
            }
        }
        pw.print('>');
        pw.flush();
    } 

    public void endElement(String nameuri, String local, String quali) {
        pw.print("</");
        pw.print(local);
        pw.print('>');
        pw.flush();
    } 

    public void characters(char ch[], int start, int length) {
        pw.print( new String(ch, start, length) );
        pw.flush();
    } 

    public void ignorableWhitespace(char ch[], int start, int length) {
        characters(ch, start, length);
    } 


    public void processingInstruction(String target, String data) {
        pw.print("<?");
        pw.print(target);
        if (data != null && data.length() > 0) {
            pw.print(' ');
            pw.print(data);
        }
        pw.print("?>");
        pw.flush();
    }

    public void warning(SAXParseException ex) {
        pw.println("\n<!-- [Warning] "+
                   ex.getSystemId()+": line "+
                   ex.getLineNumber()+", column "+
                   ex.getColumnNumber()+":\n"+
                   ex.getMessage() + " -->");
    }

    public void error(SAXParseException ex) {
        pw.println("\n<!-- [Error] "+
                   ex.getSystemId()+": line "+
                   ex.getLineNumber()+", column "+
                   ex.getColumnNumber()+":\n"+
                   ex.getMessage() + " -->");
    }

    public void fatalError(SAXParseException ex) {
        pw.println("\n<!-- [Fatal Error] "+
                   ex.getSystemId()+": line "+
                   ex.getLineNumber()+", column "+
                   ex.getColumnNumber()+":\n"+
                   ex.getMessage() + " -->");
    }


    public InputSource resolveEntity(String publicId,
                                     String systemId) {
        pw.println("\n<!-- [resolveEntity] publicId: "+
                   publicId + ", systemId: " +
                   systemId + " -->" );
        boolean dtd = false;
        boolean pid = false;
        boolean sid = false;
        if ( ( publicId != null ) || ( systemId != null ) ) {
	   pid = pid || ( publicId.indexOf("DTD") >= 0 );
	   pid = pid || ( publicId.indexOf("dtd") >= 0 );
	   sid = sid || ( systemId.indexOf("DTD") >= 0 );
	   sid = sid || ( systemId.indexOf("dtd") >= 0 );
	}
        if (pid) {
           pw.print("<!DOCTYPE ???? " );
           pw.print("PUBLIC \"" + publicId + "\"");
           if (sid) {
              pw.print(" \"" + systemId + "\"");
	      pw.println(" >");
	   }
	} else if (sid) {
           pw.print("<!DOCTYPE ???? " );
           pw.print("SYSTEM \"" + systemId + "\"");
           pw.println(" >");
	}
	return null;
    }

    public void notationDecl(String name, String publicId,
                                     String systemId) {
        pw.println("\n[notationDeclaration] name: " + name + " publicId: "+
                   publicId + ", systemId: " +
                   systemId );
    }
}

Attributes aus org.xml.sax,
AttributesImpl aus org.xml.sax.helpers:

interface Attributes
  int getLength() 

  int getIndex(String qName) 
  int getIndex(String uri, String localName) 

  String getQName(int index) 
  String getLocalName(int index) 
  String getURI(int index) 

  String getValue(int index) 
  String getValue(String qName) 
  String getValue(String uri, String localName) 

  String getType(int index) 
  String getType(String qName) 
  String getType(String uri, String localName) 
class AttributesImpl plus
  AttributesImpl() 
  AttributesImpl(Attributes atts) 

  void clear()
  void removeAttribute(int index) 

  void addAttribute(String uri, String localName, String qName, 
                    String type, String value) 
  void addAttribute(int index,
                    String uri, String localName, String qName, 
                    String type, String value) 

  void setAttributes(Attributes atts) 

  void setLocalName(int index, String localName) 
  void setQName(int index, String qName) 
  void setType(int index, String type) 
  void setURI(int index, String uri) 
  void setValue(int index, String value) 

SAXUtil: Nummerieren der h1-Überschriften

import java.io.IOException;
import java.io.PrintWriter;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.AttributesImpl;

public class SAXUtil extends SAXSchreiber {

    int chapter = 0;
    boolean insertCounter = false;

    public SAXUtil() {
	pw = new Screen();
    }

    public SAXUtil(PrintWriter p) {
	pw = p;
    }


    public void startDocument() {
        chapter = 0;
        super.startDocument();
    }


    public void startElement(String nameuri, String local, String quali, 
                             Attributes attrs) {
        if ( !quali.equals("h1") ) {
           super.startElement(nameuri,local,quali,attrs);
           pw.flush();
           return;
	}
        Attributes at = attrs;
        String cVal = null;
	if ( at != null ) {
	    cVal = at.getValue("count");
            if (cVal == null) { 
               cVal = "" + (++chapter);
               AttributesImpl ai = new AttributesImpl(at);
               ai.addAttribute("","","count","",cVal);
               at = (Attributes) ai;
	    } else {
		chapter = Integer.parseInt(cVal);
	    } 
	} else {
	    AttributesImpl ai = new AttributesImpl();
            cVal = "" + (++chapter);
            ai.addAttribute("","","count","",cVal);
            at = (Attributes) ai;
	}
        insertCounter = true;
        super.startElement(nameuri,local,quali,at);
    } 


    public void characters(char ch[], int start, int length) {
        String s = new String(ch, start, length);
        if ( insertCounter ) { 
           s = "Kapitel " + chapter + ". " + s;
           insertCounter = false;
	}
        pw.print( s );
        pw.flush();
    } 


    public InputSource resolveEntity(String publicId,
                                     String systemId) {
	return null;
    }


    public void notationDecl(String name, String publicId,
                                     String systemId) {
    }
}

Ausgabe für 'datei.xhtml' von oben:

<?xml version="1.0" encoding="iso-8859-1"?>

<!-- [resolveEntity] publicId: -//W3C//DTD XHTML 1.0 Transitional//EN, systemId: DTD/xhtml1-transitional.dtd -->
<!DOCTYPE ???? PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd" >

<!-- [resolveEntity] publicId: -//W3C//ENTITIES Latin 1 for XHTML//EN, systemId: xhtml-lat1.ent -->

<!-- [resolveEntity] publicId: -//W3C//ENTITIES Symbols for XHTML//EN, systemId: xhtml-symbol.ent -->

<!-- [resolveEntity] publicId: -//W3C//ENTITIES Special for XHTML//EN, systemId: xhtml-special.ent -->
<html>
<head>
<title>Titel</title>
<?php $a=9; 
  function yyy(a) {
    if ( a > 2 ) { 
       print("<p>nonsense</p>"); 
    }
  }
?>
<script type="text/javascript" language="JavaScript">
 
  function xxx(a) {
    if ( a > 2 ) { 
       document.write("<p>nonsense</p>"); 
    }
  }

</script>
</head>
<body bgcolor="white">
<h1>Überschrift A</h1>
<p align="center">Paragraph mit <em>Hervorhebung</em>
und einem <br clear="none"></br> Zeilenumbruch.
</p>
<ul>
<li>Listen Element</li>
<li>Entity Element &abc; </li>
</ul>
<h1>Überschrift B</h1>
<p>Text eines zweiten Paragraphen mit 
<strong>Hervorhebung</strong> 
ohne Zeilenumbruch.
</p>

<!-- [Error] datei.xhtml: line 41, column 16:
Attribute "count" must be declared for element type "h1". -->
<h1 count="10">Überschrift C</h1>
<p>Text eines dritten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
<h1>Überschrift D</h1>
<p>Text eines vierten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
</body>
</html>

SAXNummer:

import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SAXNummer {

    public static void main(String[] args) {

	SAXParserFactory pfac = SAXParserFactory.newInstance();
	SAXParser parser = null;

	try {
	    //	   pfac.setValidating(true);
	   pfac.setNamespaceAware(true);
	   parser = pfac.newSAXParser();
	}
	catch (ParserConfigurationException e) {
	    e.printStackTrace();
	}
	catch (SAXException e) {
	    e.printStackTrace();
	}

	Screen sc = new Screen();
        String uri = "datei.xhtml";
	DefaultHandler dh = new SAXUtil(sc);

	try {
            parser.parse(uri,dh);
	}
	catch (SAXException e) {
	    e.printStackTrace();
	}
	catch (IOException e) {
	    e.printStackTrace();
	}
    }
}

Ausgabe für 'datei.xhtml' von oben:

<?xml version="1.0" encoding="iso-8859-1"?>
<html>
<head>
<title>Titel</title>
<?php $a=9; 
  function yyy(a) {
    if ( a > 2 ) { 
       print("<p>nonsense</p>"); 
    }
  }
?>

<script type="text/javascript" language="JavaScript">
 
  function xxx(a) {
    if ( a > 2 ) { 
       document.write("<p>nonsense</p>"); 
    }
  }

</script>
</head>
<body bgcolor="white">
<h1 count="1">Kapitel 1. Überschrift A</h1>
<p align="center">Paragraph mit <em>Hervorhebung</em>
und einem <br clear="none"></br> Zeilenumbruch.
</p>
<ul>
<li>Listen Element</li>
<li>Entity Element &abc; </li>
</ul>
<h1 count="2">Kapitel 2. Überschrift B</h1>
<p>Text eines zweiten Paragraphen mit 
<strong>Hervorhebung</strong> 
ohne Zeilenumbruch.
</p>
<h1 count="10">Kapitel 10. Überschrift C</h1>
<p>Text eines dritten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
<h1 count="11">Kapitel 11. Überschrift D</h1>
<p>Text eines vierten Paragraphen ohne 
Hervorhebungen und ohne Zeilenumbruch.
</p>
</body>
</html>

© Universität Mannheim, Rechenzentrum, 1998-2002.

Heinz Kredel

Last modified: Mon Feb 3 23:48:33 CET 2003