کد PHP:
<?php
// Please edit these values before running your script.
// The Url of the site - the last '/' is needed
$url = 'http://localhost/mycms/';
// Where the root of the site is with relation to this file.
$root_dir = '../mycms';
// Allowed extensions to consider in sitemap
$extensions = array(
'htm',
'html',
'php'
);
// Stuff to be ignored...
// Ignore the file/folder if these words appear anywhere in the name
$always_ignore = array(
'.inc',
'admin',
'image'
);
//These files will not be linked in the sitemap.
$ignore_files = array(
'404.php',
'error.php',
'config.php',
'include.inc'
);
//The script will not enter these folders
$ignore_dirs = array(
'.svn',
'admin',
'css',
'cvs',
'images',
'inc',
'includes',
'js',
'lib',
'stats',
'styles',
'system',
'uploads'
);
// Stop editing now - Configurations are over !
// This function extracts pages
function getPages($currentDir) {
global $url, $extensions, $always_ignore, $ignore_files, $ignore_dirs, $root_dir;
$pages = array();
chdir($currentDir);
$ext = '{';
foreach($extensions as $extension) {
$ext .= '*.'.$extension.',';
}
$ext = substr($ext, 0, -1);
$ext .= '}';
$files = glob($ext, GLOB_BRACE);
foreach($files as $file) {
$flag = true;
if(in_array($file, $ignore_files)) {
$flag = false;
}
else {
foreach($always_ignore as $ignore) {
if(strpos($file, $ignore) !== false) {
$flag = false;
}
}
}
if($flag) {
$pages[] = $url.($currentDir != $root_dir ? $currentDir.'/' : '').$file;
}
}
$dirs = glob('{*,*.*}', GLOB_BRACE | GLOB_ONLYDIR);
foreach($dirs as $dir) {
$flag = true;
if(in_array($dir, $ignore_dirs)) {
$flag = false;
}
else {
foreach($always_ignore as $ignore) {
if(strpos($dir, $ignore) !== false) {
$flag = false;
}
}
}
if($flag) {
$pages = array_merge($pages, getPages(preg_replace('#\\\\#', '/', $dir)));
chdir('..');
}
}
return $pages;
}
function generateSiteMap() {
global $root_dir;
$currentDir = getcwd();
$all_pages = getPages($root_dir);
chdir($currentDir);
$output = '';
$output .= '<?xml version="1.0" encoding="UTF-8"?>'.PHP_EOL;
$output .= '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84 http://www.google.com/schemas/sitemap/0.84/sitemap.xsd">'.PHP_EOL;
//Process the files
foreach ($all_pages as $link) {
//Find the modified time.
if(preg_match('#index\.\w{3,4}$#', $link)) {
$link = preg_replace('#index\.\w{3,4}$#', '', $link);
}
$output .= ' <url>'.PHP_EOL;
$output .= ' <loc>'.htmlentities($link).'</loc>'.PHP_EOL;
$output .= ' </url>'.PHP_EOL;
}
$output .= '</urlset>'.PHP_EOL;
return $output;
}
$currentDir = preg_replace('#\\\\#', '/', getcwd());
header('Content-Type: text/xml');
echo generateSiteMap();
chdir($currentDir);
?>