TestSoftware - Python Wiki

# # Copyright (c) 2004, 2005 Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Google nor the names of its contributors may # be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # The sitemap_gen.py script is written in Python 2.2 and released to # the open source community for continuous improvements under the BSD # 2.0 new license, which can be found at: # # http://www.opensource.org/licenses/bsd-license.php #

"""Unit tests for sitemap_gen.py, a script for generating sitemaps for a web server. """

# Please be careful that all syntax used in this file can be parsed on # Python 1.5 -- this version check is not evaluated until after the # entire file has been parsed. import sys if sys.hexversion < 0x02020000:

print 'This script requires Python 2.2 or later.' print 'Currently run with version: %s' % sys.version sys.exit(1)

import binascii import fnmatch import gzip import os import tempfile import unittest import xml.dom.minidom import sitemap_gen

# True and False were introduced in Python2.2.2 try:

testTrue=True del testTrue

except NameError:

True=1 False=0

class URLCounter(object):

"""Counts returned URLs, determines how many valid v. invalid we get. This is a helper for consuming what the many Input* objects produce."""
def init(self, root, print_invalid, expected):
- """Input:
  root
  root URL for calling the URL's own Validate()
  print_invalid
  print to output all invalid URLs
  expected
  sequence of wildcard filters to validate against
  """ self._root = root self._print = print_invalid self._expected = expected self._valid = 0 self._invalid = 0
#end def init def Reset(self):
- """Reset our counts without harming the validity filters.""" self._valid = 0 self._invalid = 0
#end def Reset def Valid(self):
- """Returns number of valid URLs.""" return self._valid
#end def Valid def Invalid(self):
- """Returns number of invalid URLs.""" return self._invalid
#end def Valid def Count(self, url, allow_fragment):
- """The 'please consume this URL' function called by the URL producer.""" valid = True if valid:
  - valid = url.Validate(self._root, allow_fragment)
  if valid:
  - for filter in self._expected:
    - valid = fnmatch.fnmatchcase(url.loc, filter) if valid:
      - break
  if valid:
  - self._valid = self._valid + 1
  else:
  - if self._print:
    - url.Log(prefix='URLCounter', level=0)
    self._invalid = self._invalid + 1
#end def Count

#end class URLCounter

class TestSiteMap(unittest.TestCase):

"""Tests the sitemap_gen application.""" def testTimestampISO8601(self):
- """ Checks whether the TimestampISO8601 function works. """ self.assertEqual(sitemap_gen.TimestampISO8601(23),
  - '1970-01-01T00:00:23Z')
  self.assertEqual(sitemap_gen.TimestampISO8601(549876543),
  - '1987-06-05T07:29:03Z')
#end def testTimestampISO8601 def testExpandPathAttribute(self):
- """ Verifies our path globbing function works. """ temppath = tempfile.mktemp() tempwild = tempfile.tempdir if tempwild:
  - tempwild = tempwild + os.sep
  tempwild = tempwild + '*' try:
  - open(temppath, 'w').close() dict1 = {}
    dict2 = {'alpha' : 'beta', 'path' : 'DoesNotExist987654321.xyz'} dict3 = {'alpha' : 'beta', 'path' : tempwild}
    res1 = sitemap_gen.ExpandPathAttribute(dict1, 'path') res2 = sitemap_gen.ExpandPathAttribute(dict2, 'path') res3 = sitemap_gen.ExpandPathAttribute(dict3, 'path') self.assertEqual(len(res1), 1) self.assertEqual(res1[0], dict1) self.assertEqual(len(res2), 1) self.assertEqual(res2[0], dict2)
    self.assert_(len(res3) >= 1) anymatch = False for res in res3:
    - path = res['path']
      if path.find(temppath) >= 0:
      - anymatch = True
      self.assertEqual(res['alpha'], 'beta')
    self.assert_(anymatch)
  finally:
  - os.unlink(temppath)
#end def testExpandPathAttribute def testEncoder(self):
- """ Tests minimal functionality of the learning Unicode codec """ ENC_UTF8 = 'UTF-8' ENC_LATIN1 = 'ISO-8859-1' ENC_CYRILLIC = 'ISO-8859-5' STR1_LATIN1 = 'has an ' + binascii.a2b_hex('FC') + 'mlat' STR1_UTF8 = 'has an ' + binascii.a2b_hex('C3BC') + 'mlat' STR1_UCS2 = 'has an ' + unichr(252) + 'mlat' STR2_LATIN1 = 'DRAGON' + binascii.a2b_hex('A7') + '!' STR2_CYRILLIC = 'DRAGON' + binascii.a2b_hex('FD') + '!' STR2_UCS2 = 'DRAGON' + unichr(167) + '!' # Spawn our own encoder instance so we don't abuse the module one. encoder = sitemap_gen.Encoder() # Convert Latin-1 to UTF-8, by way of Unicode
  encoder.SetUserEncoding(ENC_LATIN1) self.assertEqual(encoder.WidenText(STR1_LATIN1, None), STR1_UCS2) self.assertEqual(encoder.NarrowText(STR1_UCS2, ENC_UTF8), STR1_UTF8) # Test learning. STR1 has no Cyrillic equivalent, STR2 just changes.
  encoder.SetUserEncoding(None) encoder._learned = [] self.assertEqual(encoder.WidenText(STR2_CYRILLIC, ENC_CYRILLIC), STR2_UCS2) self.assertEqual(encoder.WidenText(STR2_CYRILLIC, None), STR2_UCS2) self.assertEqual(encoder.NarrowText(STR1_UCS2, None), STR1_UTF8) self.assert_(not encoder._learned) self.assertEqual(encoder.NarrowText(STR1_UCS2, ENC_LATIN1), STR1_LATIN1) self.assertEqual(encoder.NarrowText(STR1_UCS2, None), STR1_LATIN1) self.assertEqual(encoder.NarrowText(STR2_UCS2, None), STR2_LATIN1)
#end def testEncoder def testURL(self):
- """ Vigorously tests our URL attribute processing. """
  # Test the IsAbsolute method self.assert_(sitemap_gen.URL.IsAbsolute('http://a.b.c/d/e.txt?f=g#h')) self.assert_(sitemap_gen.URL.IsAbsolute('http://a.b.c')) self.assert_(not sitemap_gen.URL.IsAbsolute('http:///d/e.txt?f=g#h')) self.assert_(not sitemap_gen.URL.IsAbsolute('http:a.b.c/d/e.txt?f=g#h')) self.assert_(not sitemap_gen.URL.IsAbsolute('a.b.c/d/e.txt?f=g#h')) self.assert_(not sitemap_gen.URL.IsAbsolute('/d/e.txt?f=g#h')) # Canonicalize our base URL
  BASE_R = 'http://www.example.com/f' + binascii.a2b_hex('F6F6') + '/' BASE_C = 'http://www.example.com/f%F6%F6/' sitemap_gen.encoder.SetUserEncoding('ISO-8859-1') self.assertEqual(sitemap_gen.URL.Canonicalize(BASE_R), BASE_C) # Test how canonicalization handles pre-quoted values self.assertEqual(sitemap_gen.URL.Canonicalize(
  - 'http://www.example.com/my%25thing'), 'http://www.example.com/my%25thing')
  self.assertEqual(sitemap_gen.URL.Canonicalize(
  - 'http://www.example.com/my%thing'), 'http://www.example.com/my%25thing')
  # Test IDNA encoding # The generator can only do the "right thing" on Python 2.3 or higher warn = sitemap_gen.output.num_warns
  if sys.hexversion >= 0x02030000:
  - self.assertEqual(sitemap_gen.URL.Canonicalize(
    - 'http://www.' + unichr(252) + 'mlat.com/' + unichr(252) + 'mlat.txt'), 'http://www.xn--mlat-zra.com/%FCmlat.txt')
    self.assertEqual(sitemap_gen.output.num_warns, warn)
  else:
  - self.assertEqual(sitemap_gen.URL.Canonicalize(
    - 'http://www.' + unichr(252) + 'mlat.com/' + unichr(252) + 'mlat.txt'), 'http://www.%FCmlat.com/%FCmlat.txt')
    self.assertEqual(sitemap_gen.output.num_warns, warn + 2)
  # All valid data warn = sitemap_gen.output.num_warns url1 = sitemap_gen.URL()
  url1.TrySetAttribute('loc', BASE_R + 'bar.html') url1.TrySetAttribute('lastmod', '1987-06-05T07:29:03Z') url1.TrySetAttribute('changefreq', 'daily') url1.TrySetAttribute('priority', '0.3') self.assert_(url1.Validate(BASE_C, True)) self.assertEqual(sitemap_gen.output.num_warns, warn) # Valid ref, all else invalid warn = sitemap_gen.output.num_warns url2 = sitemap_gen.URL()
  url2.TrySetAttribute('loc', BASE_C + 'bar.html') url2.TrySetAttribute('lastmod', 'June 1, 2005') url2.TrySetAttribute('changefreq', 'every second') url2.TrySetAttribute('priority', 'infinite') url2.TrySetAttribute('badattr', 'Nope!') self.assert_(url2.Validate(BASE_C, True)) self.assertEqual(sitemap_gen.output.num_warns, warn + 4) # Two URLs with same ref should compare equal self.assertEqual(url1, url2) # A ref not based warn = sitemap_gen.output.num_warns url3 = sitemap_gen.URL()
  url3.TrySetAttribute('loc', 'http://www.example.com/bar/foo.html') self.assert_(not url3.Validate(BASE_C, True)) self.assertEqual(sitemap_gen.output.num_warns, warn + 1) # A fragmentary URL warn = sitemap_gen.output.num_warns url4 = sitemap_gen.URL()
  url4.TrySetAttribute('loc', '/foo.html') self.assert_(not url4.Validate(BASE_C, False)) self.assertEqual(sitemap_gen.output.num_warns, warn + 1) url4.TrySetAttribute('loc', '/xyzzy/foo.html') self.assert_(url4.Validate('http://www.example.com/', True)) self.assertEqual(url4.loc, 'http://www.example.com/xyzzy/foo.html') self.assertEqual(sitemap_gen.output.num_warns, warn + 1) # Test a whole sequence of good and bad timestamp values timestamps_good = [
  - '2001', '2001-01', '2001-01-02', '2001-01-03T01:02Z', '2001-01-03T01:02:03Z', '2001-01-03T01:02:03.0123Z', '2001-01-03T01:02+00:00', '2001-01-03T01:02:03-99:99', '2001-01-03T01:02:03.0123+88:88', ]
  timestamps_bad = [
  - '2001:01:03T01:02Z', '2001-01-03T01:02:03.Z', 'a2001-01-06T01:02:05-99:99', '2001-01-06T01:02:05-99:99Z', '2001-1-6T01:02:05-99:99', 'xyzzy', '2001-01-03T01:02:03.1.2Z', ]
  warn = sitemap_gen.output.num_warns
  url3.TrySetAttribute('loc', BASE_C + 'foo.html') for ts in timestamps_good:
  - url3.TrySetAttribute('lastmod', ts) self.assert_(url3.Validate(BASE_C, True))
  self.assertEqual(sitemap_gen.output.num_warns, warn) for ts in timestamps_bad:
  - url3.TrySetAttribute('lastmod', ts) self.assert_(url3.Validate(BASE_C, True))
  self.assertEqual(sitemap_gen.output.num_warns, warn + len(timestamps_bad))
#end def testURL def testFilter(self):
- """ Test the filtering object """ url1 = sitemap_gen.URL() url2 = sitemap_gen.URL()
  url1.TrySetAttribute('loc', 'http://www.example.com/foo/bar.html') url2.TrySetAttribute('loc', 'http://www.example.com/bar/foo.html') url1.Validate('http://www.example.com', True) url2.Validate('http://www.example.com', True) # Arguments error = sitemap_gen.output.num_errors args_bad = [
  - {}, {'pattern' : '*', 'type' : 'unknown'}, {'pattern' : '*', 'type' : 'wildcard', 'action' : 'look pretty'}, {'pattern' : '*', 'type' : 'regexp'}, ]
  error = sitemap_gen.output.num_errors for args in args_bad:
  - sitemap_gen.Filter(args)
  self.assertEqual(sitemap_gen.output.num_errors, error + len(args_bad)) # Wildcard filt_w = sitemap_gen.Filter({'pattern' : '*/foo/*', 'type' : 'wildcard' }) self.assertEqual(filt_w.Apply(url1), False) self.assertEqual(filt_w.Apply(url2), None) # Regexp filt_r = sitemap_gen.Filter({'pattern' : '/bar/[^/]+$', 'type' : 'REGEXP',
  - 'action' : 'PASS'})
  self.assertEqual(filt_r.Apply(url1), None) self.assertEqual(filt_r.Apply(url2), True)
#end def testFilter def Count(self, url, allow_fragment):
- if url.Validate('http://www.example.com/', allow_fragment):
  - self.valid_urls = self.valid_urls + 1
  else:
  - self.invalid_urls = self.invalid_urls + 1
#end def Count valid_urls = 0 invalid_urls = 0 def testInputURL(self):
- """ Test one of the Input mechanisms: InputURL """ # Feed a couple URLs. Make sure we get an error on extra attributes. self.valid_urls = 0 self.invalid_urls = 0 error = sitemap_gen.output.num_errors warn = sitemap_gen.output.num_warns
  generator1 = sitemap_gen.InputURL({'href' : 'http://www.example.com/1',
  - 'priority' : '0.3', 'lastmod' : '2004-11-14T01:00-07:00', 'changefreq' : 'hourly', 'unknownInURL' : 'attribute'})
  generator2 = sitemap_gen.InputURL({'href' : 'http://www.example.com/2',
  - 'priority' : '0.3', 'lastmod' : '2004-11-14T01:00-07:00', 'changefreq' : 'hourly'})
  generator1.ProduceURLs(self.Count) generator2.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 1) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_errors, error + 1) self.assertEqual(sitemap_gen.output.num_warns, warn)
#end def testInputURL def testInputURLList(self):
- """ Test one of the Input mechanisms: InputURLList """ path = tempfile.mktemp() file = open(path, 'w') try:
  - # Create a temp file we can read testText = """

http://www.example.com/foo/bar unknownInURLList=attribute http://www.example.com/foo/xxx.pdf lastmod=2003-12-31T14:05:06+00:00 http://www.example.com/foo/yyy?x=12&y=23 changefreq=weekly priority=0.3

""" file.write(testText) file.close() # Feed in the data. Make sure we get a warning on the bad attribute. self.valid_urls = 0 self.invalid_urls = 0 warn = sitemap_gen.output.num_warns generator = sitemap_gen.InputURLList({'path' : path}) generator.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 3) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_warns, warn + 1)

finally:
- os.unlink(path)

#end def testInputURLList def testInputDirectory(self):
- """Test one of the Input mechanisms: InputDirectory. I've seen a subtle path-bug appear when going into sub-sub-directories that didn't under just sub-directories. So we go to the trouble to make a whole little directory tree to read. """ counter = URLCounter('http://www.example.com/', True, (
  - 'http://www.example.com/', 'http://www.example.com/one.html', 'http://www.example.com/two.html', 'http://www.example.com/xyzzy/', 'http://www.example.com/xyzzy/thr.html', 'http://www.example.com/xyzzy/zyxxy/', 'http://www.example.com/xyzzy/zyxxy/fiv.html', ))
  path = tempfile.mktemp() subpath = os.path.join(path, 'xyzzy') subsubpath = os.path.join(subpath, 'zyxxy') try:
  - # Create some dummy empty files os.mkdir(path) os.mkdir(subpath) os.mkdir(subsubpath) path_one = os.path.join(path, 'one.html') path_two = os.path.join(path, 'two.html') path_thr = os.path.join(subpath, 'thr.html') path_for = os.path.join(subpath, 'default.html') path_fiv = os.path.join(subsubpath, 'fiv.html') open(path_one, 'w').close() open(path_two, 'w').close() open(path_thr, 'w').close() open(path_for, 'w').close() open(path_fiv, 'w').close() # Feed in the data. There should be no warnings. warn = sitemap_gen.output.num_warns
    generator = sitemap_gen.InputDirectory({'path' : path,
    - 'url' : 'http://www.example.com/', 'default_file' : 'default.html'}, 'http://www.example.com/')
    generator.ProduceURLs(counter.Count) self.assertEqual(counter.Valid(), 7) self.assertEqual(counter.Invalid(), 0) self.assertEqual(sitemap_gen.output.num_warns, warn)
  finally:
  - os.unlink(path_one) os.unlink(path_two) os.unlink(path_thr) os.unlink(path_for) os.unlink(path_fiv) os.rmdir(subsubpath) os.rmdir(subpath) os.rmdir(path)
#end def testInputDirectory def testInputAccessLogCLF(self):
- """ Test one of the Input mechanisms: InputAccessLog (Common logfile) """ path = tempfile.mktemp() file = open(path, 'w') try:
  - # Create a temp file we can read
    testText =

msnbot.msn.com - - [15/May/2005:07:46:50 -0700] "GET /~guest/main/ HTTP/1.0" 200 5670 221.216.237.71 - - [15/May/2005:07:59:25 -0700] "GET /~guest/bookmark/ HTTP/1.1" 200 39195 221.216.237.71 - - [15/May/2005:07:59:27 -0700] "GET /favicon.ico HTTP/1.1" 404 217 c-67-161-121-105.hsd1.wa.comcast.net - - [15/May/2005:11:17:23 -0700] "GET /picts/top.jpg HTTP/1.1" 200 10044 cpe-65-24-155-46.columbus.res.rr.com - - [16/May/2005:22:53:07 -0700] "HEAD http://www.example.com/~guest HTTP/1.1" 200 0

file.write(testText) file.close() # Feed in the data self.valid_urls = 0 self.invalid_urls = 0 warn = sitemap_gen.output.num_warns
generator = sitemap_gen.InputAccessLog({'path' : path}) generator.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 4) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_warns, warn)

finally:
- os.unlink(path)

#end def testInputAccessLogCLF def testInputAccessLogELF(self):
- """ Test one of the Input mechanisms: InputAccessLog (Extended logfile) """ path = tempfile.mktemp() file = open(path, 'w') try:
  - # Create a temp file we can read
    testText =

#Software: Microsoft Internet Information Services 6.0 #Version: 1.0 #Date: 2004-03-22 09:20:36 #Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-w in32-status 2004-03-22 09:20:36 192.168.0.58 GET /Default.htm - 80 - 4.5.11.3 Mozilla/4.0+(compatible;+MSIE+5.5;+Windows+98) 200 0 64 2004-03-22 09:22:58 192.168.0.58 GET /Default.htm - 80 - 24.87.160.82 Mozilla/4.0+(compatible;+MSIE+5.5;+Windows+98) 200 0 6 4

file.write(testText) file.close() # Feed in the data self.valid_urls = 0 self.invalid_urls = 0 warn = sitemap_gen.output.num_warns
generator = sitemap_gen.InputAccessLog({'path' : path}) generator.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 2) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_warns, warn)

finally:
- os.unlink(path)

#end def testInputAccessLogELF def testInputSitemap(self):
- """ Test one of the Input mechanisms: InputSitemap """ path1 = tempfile.mktemp('.xml') path2 = tempfile.mktemp('.xml') path3 = tempfile.mktemp('.xml') path4 = tempfile.mktemp('.xml') file1 = None file2 = None file3 = None file4 = None
  index = <?xml version="1.0" encoding="UTF-8"?>

<sitemapindex

xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
- http://www.google.com/schemas/sitemap/0.84/siteindex.xsd">

<sitemap>
- <loc>http://www.example.com/path/to/%(PATH2)s</loc> <lastmod>2005-07-15T17:41:22Z</lastmod>
</sitemap> <sitemap>
- <loc>http://www.example.com/path/to/%(PATH3)s</loc> <lastmod>2005-07-15T17:41:22Z</lastmod>
</sitemap>

</sitemapindex>

content1 = <?xml version="1.0" encoding="UTF-8"?>

<urlset

xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
- http://www.google.com/schemas/sitemap/0.84/sitemap.xsd">

<url>
- <loc>http://www.example.com/another/path/to/samplefile1.html</loc> <lastmod>2005-07-13T00:00:12Z</lastmod> <priority>0.5000</priority>
</url> <url>
- <loc>http://www.example.com/another/path/to/samplefile2.html</loc> <lastmod>2004-11-16T20:22:06Z</lastmod> <priority>0.5000</priority>
</url>

</urlset>

content2 = <?xml version="1.0" encoding="UTF-8"?>

<urlset

xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
- http://www.google.com/schemas/sitemap/0.84/sitemap.xsd">

<url badSitemapAttr="Hello, World!">
- <loc>http://www.example.com/another/path/to/samplefile3.html</loc> <lastmod>2005-07-13T00:00:12Z</lastmod> <priority>0.5000</priority>
</url> <url>
- <loc>http://www.example.com/another/path/to/samplefile4.html</loc> <lastmod>2004-11-16T20:22:06Z</lastmod> <priority>0.5000</priority>
</url>

</urlset>

# This index is invalid because it points to another index file.
badindex = <?xml version="1.0" encoding="UTF-8"?>

<sitemapindex

xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
- http://www.google.com/schemas/sitemap/0.84/siteindex.xsd">

<sitemap>
- <loc>http://www.example.com/path/to/%(PATH2)s</loc> <lastmod>2005-07-15T17:41:22Z</lastmod>
</sitemap> <sitemap>
- <loc>http://www.example.com/path/to/%(PATH1)s</loc> <lastmod>2005-07-15T17:41:22Z</lastmod>
</sitemap>

</sitemapindex>

# Make a nice complicated set of two index files and two sitemaps. try:
- file1 = open(path1, 'wt') file2 = open(path2, 'wt') file3 = open(path3, 'wt') file4 = open(path4, 'wt') file1.write(index % {
  - 'PATH1' : os.path.basename(path1), 'PATH2' : os.path.basename(path2), 'PATH3' : os.path.basename(path3)})
  file2.write(content1) file3.write(content2) file4.write(badindex % {
  - 'PATH1' : os.path.basename(path1), 'PATH2' : os.path.basename(path2), 'PATH3' : os.path.basename(path3)})
  file1.close() file1 = None file2.close() file2 = None file3.close() file3 = None file4.close() file4 = None # Feed in the good data. Make sure we get warned on the bad attribute. self.valid_urls = 0 self.invalid_urls = 0 warn = sitemap_gen.output.num_warns
  generator = sitemap_gen.InputSitemap({'path' : path1}) generator.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 4) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_warns, warn + 1) # Feed in the bad data. Should error once on the bad index and once # because it aborts processing the XML. self.valid_urls = 0 self.invalid_urls = 0 errors = sitemap_gen.output.num_errors
  generator = sitemap_gen.InputSitemap({'path' : path4}) generator.ProduceURLs(self.Count) self.assertEqual(self.valid_urls, 2) self.assertEqual(self.invalid_urls, 0) self.assertEqual(sitemap_gen.output.num_errors, errors + 2)
finally:
- if file1 is not None:
  - file1.close()
  if file2 is not None:
  - file2.close()
  if file3 is not None:
  - file3.close()
  if os.path.exists(path1):
  - os.unlink(path1)
  if os.path.exists(path2):
  - os.unlink(path2)
  if os.path.exists(path3):
  - os.unlink(path3)

#end def testInputSitemap def testFilePathGenerator(self):
- """ Test our iteration of filenames """
  gen1 = sitemap_gen.FilePathGenerator() gen2 = sitemap_gen.FilePathGenerator() gen3 = sitemap_gen.FilePathGenerator() self.assert_(gen1.Preload('/tmp/bar/foo.xml')) self.assert_(gen2.Preload('foo.xml.gz')) self.assert_(gen3.Preload('/foo.gz')) self.assert_(not gen1.is_gzip) self.assert_( gen2.is_gzip) self.assert_( gen3.is_gzip) self.assertEqual(gen1.GeneratePath(0),
  - os.path.normpath('/tmp/bar/foo.xml'))
  self.assertEqual(gen2.GeneratePath(1),'foo1.xml.gz') self.assertEqual(gen1.GeneratePath('_index.xml'),
  - os.path.normpath('/tmp/bar/foo_index.xml'))
  self.assertEqual(gen1.GenerateURL('_index.xml', 'http://www.example.com/'),
  - 'http://www.example.com/foo_index.xml')
  self.assertEqual(gen1.GenerateURL(2, 'http://www.example.com/'),
  - 'http://www.example.com/foo2.xml')
  self.assertEqual(gen2.GenerateWildURL('http://www.example.com/'),
  - 'http://www.example.com/foo*.xml.gz')
#end def testFilePathGenerator def testSitemap(self):
- """Test a basic config of the overall sitemap class.""" path1 = tempfile.mktemp() path2 = tempfile.mktemp(".xml.gz") file = open(path1, 'w') try:
  - # Create a temp file we can read
    testText = <?xml version="1.0" encoding="UTF-8"?>

<site

base_url="http://www.example.com/" store_into="%(OUTPUTFILENAME)s" default_encoding="UTF-8" verbose="3"

<url href="http://www.example.com/.htaccess" /> <url href="http://www.example.com/foo/bar.html" /> <url href="http://www.example.com/foo/bar.gif" /> <url href="http://www.example.com/foo/bar.html" /> <url href="http://www.example.com/percent%%%%percent.html" /> <url href="http://www.example.com/ümlat.html" /> <filter action="drop" type="regexp" pattern="/\.[^/]*$" />

</site>

file.write(testText % {'OUTPUTFILENAME' : path2}) file.close() # Bring up the engine warn = sitemap_gen.output.num_warns error = sitemap_gen.output.num_errors
sitemap = sitemap_gen.CreateSitemapFromFile(path1, True) self.assert_(sitemap) sitemap.Generate() self.assertEqual(sitemap_gen.output.num_warns, warn) self.assertEqual(sitemap_gen.output.num_errors, error) # Verify we got readable XML out of it file = gzip.open(path2, mode='rb') result = file.read() file.close() dom = xml.dom.minidom.parseString(result) self.assertEqual(len(dom.getElementsByTagName('url')), 4)
self.assert_(result.find('http://www.example.com/foo/bar.html') > 0) self.assert_(result.find('http://www.example.com/foo/bar.gif') > 0) self.assert_(result.find('%25%25') > 0) self.assert_(result.find('%C3%BC') > 0)

finally:
- if os.path.exists(path2):
  - os.unlink(path2)
  os.unlink(path1)

#end def testSitemap

#end class TestSiteMap

# # main #

if name == 'main':

unittest.main()

Page

User