php処理xmlドキュメントの解析関数

67185 ワード


  
  
php xml , xml 。 。 xml , xml 。 xml 。

, ,

<? php
$file = " xmltest.xml " ;
//
function trustedFile( $file ) {
// only trust local files owned by ourselves
if ( ! eregi ( " ^([a-z]+):// " , $file )
&& fileowner ( $file ) == getmyuid ()) {
return true ;
}
return false ;
}
// 。 。
// $attribs

function startElement( $parser , $name , $attribs = array () ) {
print " &lt;<font color=\ " # 0000cc\">$name</font>";
if ( sizeof ( $attribs )) {
while ( list ( $k , $v ) = each ( $attribs )) {
print " <font color=\ " # 009900\">$k</font>=\"<font
color = \ " #990000\ " > $v </ font > \ "" ;
}
}
print " &gt; " ;
}
//
function endElement( $parser , $name ) {
print " &lt;/<font color=\ " # 0000cc\">$name</font>&gt;";
}
//
function characterData( $parser , $data ) {
print " <b> $data </b> " ;
}
// (PI)
function PIHandler( $parser , $target , $data ) {
switch ( strtolower ( $target )) {
case " php " :
global $parser_file ;
// If the parsed document is "trusted", we say it is safe
// to execute PHP code inside it. If not, display the code
// instead.

if (trustedFile( $parser_file [ $parser ])) {
eval ( $data );
}
else {
printf ( " Untrusted PHP code: <i>%s</i> " ,
htmlspecialchars ( $data ));
}
break ;
}
}
//
function defaultHandler( $parser , $data ) {
if ( substr ( $data , 0 , 1 ) == " & " && substr ( $data , - 1 , 1 ) == " ; " ) { // , 。
printf ( ' <font color="#aa00aa">%s</font> ' ,
htmlspecialchars ( $data ));
}
else {
printf ( ' <font size="-1">%s</font> ' ,
htmlspecialchars ( $data ));
}
}
//
function externalEntityRefHandler( $parser , $openEntityNames , $base , $systemId , $publicId ) {
if ( $systemId ) {
if ( ! list ( $parser , $fp ) = new_xml_parser( $systemId )) {
printf ( " Could not open entity %s at %s
" , $openEntityNames ,
$systemId );
return false ;
}
while ( $data = fread ( $fp , 4096 )) {
if ( ! xml_parse ( $parser , $data , feof ( $fp ))) {
printf ( " XML error: %s at line %d while parsing entity %s
" ,
xml_error_string ( xml_get_error_code ( $parser )) ,
xml_get_current_line_number ( $parser ) , $openEntityNames );
xml_parser_free ( $parser );
return false ;
}
}
xml_parser_free ( $parser );
return true ;
}
return false ;
}
// xml 。
function new_xml_parser( $file ) {
global $parser_file ;
$xml_parser = xml_parser_create (); // XML , 。
xml_parser_set_option ( $xml_parser , XML_OPTION_CASE_FOLDING , 1 ); //
xml_set_element_handler ( $xml_parser , " startElement " , " endElement " ); // ,bool
xml_set_character_data_handler ( $xml_parser , " characterData " ); // ,bool
xml_set_processing_instruction_handler ( $xml_parser , " PIHandler " ); // (PI)
xml_set_default_handler ( $xml_parser , " defaultHandler " ); //
xml_set_external_entity_ref_handler ( $xml_parser , " externalEntityRefHandler " ); //

if ( ! ( $fp = @ fopen ( $file , " r " ))) {
return false ;
}
if ( ! is_array ( $parser_file )) {
settype ( $parser_file , " array " ); // array
}
$parser_file [ $xml_parser ] = $file ; // ? ?( )
// echo "<font color=red >parser = ";
// print_r($parser_file);
// echo "<br>$xml_parser";
// echo "</font>";

return array ( $xml_parser , $fp ); //
}
if ( ! ( list ( $xml_parser , $fp ) = new_xml_parser( $file ))) {
die ( " could not open XML input " );
}
print " <pre> " ;
while ( $data = fread ( $fp , 4096 )) {
if ( ! xml_parse ( $xml_parser , $data , feof ( $fp ))) { // 。 if , 。
die ( sprintf ( " XML error: %s at line %d
" ,
xml_error_string ( xml_get_error_code ( $xml_parser )) ,
xml_get_current_line_number ( $xml_parser )));
}
}
print " </pre> " ;
print " parse complete
" ;
xml_parser_free ( $xml_parser );
?>

xmltest
. xml
<? xml version = " 1.0 " encoding = " UTF-8 " ?>
<!-- xml , -->
< chapter >
< TITLE > Title </ TITLE >
< para >
< informaltable >
< tgroup cols = " 3 " >
< tbody >
< row >< entry > a1 </ entry >< entry morerows = " 1 " > b1 </ entry >< entry > c1 </ entry ></ row >
< row >< entry > a2 </ entry >< entry > c2 </ entry ></ row >
< row >< entry > a3 </ entry >< entry > b3 </ entry >< entry > c3 </ entry ></ row >
</ tbody >
</ tgroup >
</ informaltable >
</ para >
< section id = " about " >
< title > About this Document </ title >
< para >
<!-- this is a comment -->
<? php print ' Hi! This is PHP version ' . phpversion (); ?>
</ para >
</ section >
</ chapter >


xml php 。
<? php
class AminoAcid {
var $name ; // aa name
var $symbol ; // three letter symbol
var $code ; // one letter code
var $type ; // hydrophobic, charged or neutral

function AminoAcid ( $aa ) {
foreach ( $aa as $k => $v )
$this -> $k = $aa [ $k ];
}
}
function readDatabase( $filename ) {
// read the xml database of aminoacids
$data = implode ( "" , file ( $filename )); // , , $data.
$parser = xml_parser_create ();
xml_parser_set_option ( $parser , XML_OPTION_CASE_FOLDING , 0 ); //
xml_parser_set_option ( $parser , XML_OPTION_SKIP_WHITE , 1 );
xml_parse_into_struct ( $parser , $data , $values , $tags ); // XML , XML ,
//$tags $values 。 。

xml_parser_free ( $parser );
// loop through the structures
// ( xml , 。

foreach ( $tags as $key => $val ) {
if ( $key == " molecule " ) {
$molranges = $val ;
// each contiguous pair of array entries are the
// lower and upper range for each molecule definition

for ( $i = 0 ; $i < count ( $molranges ); $i += 2 ) {
$offset = $molranges [ $i ] + 1 ;
$len = $molranges [ $i + 1 ] - $offset ;
$tdb [] = parseMol( array_slice ( $values , $offset , $len ));
}
}
else {
continue ;
}
}
// echo "<font color=red>values is:";
// print_r($values);
// echo "</font>";

return array ( $tdb , $values );
}
function parseMol( $mvalues ) {
for ( $i = 0 ; $i < count ( $mvalues ); $i ++ )
$mol [ $mvalues [ $i ][ " tag " ]] = $mvalues [ $i ][ " value " ];

// echo "<font color=blue> after parsemol :";
// print_r($mol);
// echo "</font>";

return new AminoAcid( $mol );
}
$db = readDatabase( " moldb.xml " );
echo " ** Database of AminoAcid objects:
" ;
// echo "<font color=purple> readdatabase :";
print_r ( $db [ 0 ]);
// echo "</font>";

$s = parseMol( $db [ 1 ]);
?>

moldb
. xml
<? xml version = " 1.0 " encoding = " UTF-8 " ?>
< moldb >
< molecule >
< name > Alanine </ name >
< symbol > ala </ symbol >
< code > A </ code >
< type > hydrophobic </ type >
</ molecule >
< molecule >
< name > Lysine </ name >
< symbol > lys </ symbol >
< code > K </ code >
< type > charged </ type >
</ molecule >
</ moldb >