| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 | 
							- #!/usr/bin/php
 
- <?php
 
- chdir(dirname(__FILE__));
 
- require_once 'common.php';
 
- assertCli();
 
- /**
 
-  * @file
 
-  * Parses *.ent files into an entity lookup table, and then serializes and
 
-  * writes the whole kaboodle to a file. The resulting file is cached so
 
-  * that this script does not need to be run. This script should rarely,
 
-  * if ever, be run, since HTML's entities are fairly immutable.
 
-  */
 
- // here's where the entity files are located, assuming working directory
 
- // is the same as the location of this PHP file. Needs trailing slash.
 
- $entity_dir = '../docs/entities/';
 
- // defines the output file for the serialized content.
 
- $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
 
- // courtesy of a PHP manual comment
 
- function unichr($dec)
 
- {
 
-     if ($dec < 128) {
 
-         $utf  = chr($dec);
 
-     } elseif ($dec < 2048) {
 
-         $utf  = chr(192 + (($dec - ($dec % 64)) / 64));
 
-         $utf .= chr(128 + ($dec % 64));
 
-     } else {
 
-         $utf  = chr(224 + (($dec - ($dec % 4096)) / 4096));
 
-         $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
 
-         $utf .= chr(128 + ($dec % 64));
 
-     }
 
-     return $utf;
 
- }
 
- if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
 
- if ( file_exists($output_file) ) exit("Fatal Error: output file already exists.\n");
 
- $dh = @opendir($entity_dir);
 
- if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
 
- $entity_files = array();
 
- while (($file = readdir($dh)) !== false) {
 
-     if (@$file[0] === '.') continue;
 
-     if (substr(strrchr($file, "."), 1) !== 'ent') continue;
 
-     $entity_files[] = $file;
 
- }
 
- closedir($dh);
 
- if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
 
- $entity_table = array();
 
- $regexp = '/<!ENTITY\s+([A-Za-z0-9]+)\s+"&#(?:38;#)?([0-9]+);">/';
 
- foreach ( $entity_files as $file ) {
 
-     $contents = file_get_contents($entity_dir . $file);
 
-     $matches = array();
 
-     preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
 
-     foreach ($matches as $match) {
 
-         $entity_table[$match[1]] = unichr($match[2]);
 
-     }
 
- }
 
- $output = serialize($entity_table);
 
- $fh = fopen($output_file, 'w');
 
- fwrite($fh, $output);
 
- fclose($fh);
 
- echo "Completed successfully.";
 
- // vim: et sw=4 sts=4
 
 
  |