>>> from mechanize._rfc3986 import urlsplit, urljoin, remove_dot_segments Some common cases >>> urlsplit("http://example.com/spam/eggs/spam.html?apples=pears&a=b#foo") ('http', 'example.com', '/spam/eggs/spam.html', 'apples=pears&a=b', 'foo') >>> urlsplit("http://example.com/spam.html#foo") ('http', 'example.com', '/spam.html', None, 'foo') >>> urlsplit("ftp://example.com/foo.gif") ('ftp', 'example.com', '/foo.gif', None, None) >>> urlsplit('ftp://joe:password@example.com:port') ('ftp', 'joe:password@example.com:port', '', None, None) >>> urlsplit("mailto:jjl@pobox.com") ('mailto', None, 'jjl@pobox.com', None, None) The five path productions path-abempty: >>> urlsplit("http://www.example.com") ('http', 'www.example.com', '', None, None) >>> urlsplit("http://www.example.com/foo") ('http', 'www.example.com', '/foo', None, None) path-absolute: >>> urlsplit("a:/") ('a', None, '/', None, None) >>> urlsplit("a:/b:/c/") ('a', None, '/b:/c/', None, None) path-noscheme: >>> urlsplit("a:b/:c/") ('a', None, 'b/:c/', None, None) path-rootless: >>> urlsplit("a:b:/c/") ('a', None, 'b:/c/', None, None) path-empty: >>> urlsplit("quack:") ('quack', None, '', None, None) >>> remove_dot_segments("/a/b/c/./../../g") '/a/g' >>> remove_dot_segments("mid/content=5/../6") 'mid/6' >>> remove_dot_segments("/b/c/.") '/b/c/' >>> remove_dot_segments("/b/c/./.") '/b/c/' >>> remove_dot_segments(".") '' >>> remove_dot_segments("/.") '/' >>> remove_dot_segments("./") '' >>> remove_dot_segments("/..") '/' >>> remove_dot_segments("/../") '/' Examples from RFC 3986 section 5.4 Normal Examples >>> base = "http://a/b/c/d;p?q" >>> def join(uri): return urljoin(base, uri) >>> join("g:h") 'g:h' >>> join("g") 'http://a/b/c/g' >>> join("./g") 'http://a/b/c/g' >>> join("g/") 'http://a/b/c/g/' >>> join("/g") 'http://a/g' >>> join("//g") 'http://g' >>> join("?y") 'http://a/b/c/d;p?y' >>> join("g?y") 'http://a/b/c/g?y' >>> join("#s") 'http://a/b/c/d;p?q#s' >>> join("g#s") 'http://a/b/c/g#s' >>> join("g?y#s") 'http://a/b/c/g?y#s' >>> join(";x") 'http://a/b/c/;x' >>> join("g;x") 'http://a/b/c/g;x' >>> join("g;x?y#s") 'http://a/b/c/g;x?y#s' >>> join("") 'http://a/b/c/d;p?q' >>> join(".") 'http://a/b/c/' >>> join("./") 'http://a/b/c/' >>> join("..") 'http://a/b/' >>> join("../") 'http://a/b/' >>> join("../g") 'http://a/b/g' >>> join("../..") 'http://a/' >>> join("../../") 'http://a/' >>> join("../../g") 'http://a/g' Abnormal Examples >>> join("../../../g") 'http://a/g' >>> join("../../../../g") 'http://a/g' >>> join("/./g") 'http://a/g' >>> join("/../g") 'http://a/g' >>> join("g.") 'http://a/b/c/g.' >>> join(".g") 'http://a/b/c/.g' >>> join("g..") 'http://a/b/c/g..' >>> join("..g") 'http://a/b/c/..g' >>> join("./../g") 'http://a/b/g' >>> join("./g/.") 'http://a/b/c/g/' >>> join("g/./h") 'http://a/b/c/g/h' >>> join("g/../h") 'http://a/b/c/h' >>> join("g;x=1/./y") 'http://a/b/c/g;x=1/y' >>> join("g;x=1/../y") 'http://a/b/c/y' >>> join("g?y/./x") 'http://a/b/c/g?y/./x' >>> join("g?y/../x") 'http://a/b/c/g?y/../x' >>> join("g#s/./x") 'http://a/b/c/g#s/./x' >>> join("g#s/../x") 'http://a/b/c/g#s/../x' >>> join("http:g") 'http://a/b/c/g' Additional urljoin tests, not taken from RFC: >>> join("/..") 'http://a/' >>> join("/../") 'http://a/'