00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_path.h 00019 * @brief A path manipulation library 00020 * 00021 * All incoming and outgoing paths are non-NULL and in UTF-8, unless 00022 * otherwise documented. 00023 * 00024 * No result path ever ends with a separator, no matter whether the 00025 * path is a file or directory, because we always canonicalize() it. 00026 * 00027 * Nearly all the @c svn_path_xxx functions expect paths passed into 00028 * them to be in canonical form as defined by the Subversion path 00029 * library itself. The only functions which do *not* have such 00030 * expectations are: 00031 * 00032 * - @c svn_path_canonicalize() 00033 * - @c svn_path_is_canonical() 00034 * - @c svn_path_internal_style() 00035 * 00036 * For the most part, we mean what most anyone would mean when talking 00037 * about canonical paths, but to be on the safe side, you must run 00038 * your paths through @c svn_path_canonicalize() before passing them to 00039 * other functions in this API. 00040 */ 00041 00042 #ifndef SVN_PATH_H 00043 #define SVN_PATH_H 00044 00045 00046 #include <apr_pools.h> 00047 #include <apr_tables.h> 00048 00049 #include "svn_string.h" 00050 #include "svn_error.h" 00051 00052 00053 #ifdef __cplusplus 00054 extern "C" { 00055 #endif /* __cplusplus */ 00056 00057 00058 00059 /** Convert @a path from the local style to the canonical internal style. */ 00060 const char *svn_path_internal_style(const char *path, apr_pool_t *pool); 00061 00062 /** Convert @a path from the canonical internal style to the local style. */ 00063 const char *svn_path_local_style(const char *path, apr_pool_t *pool); 00064 00065 00066 /** Join a base path (@a base) with a component (@a component), allocated in 00067 * @a pool. 00068 * 00069 * If either @a base or @a component is the empty path, then the other 00070 * argument will be copied and returned. If both are the empty path the 00071 * empty path is returned. 00072 * 00073 * If the @a component is an absolute path, then it is copied and returned. 00074 * Exactly one slash character ('/') is used to joined the components, 00075 * accounting for any trailing slash in @a base. 00076 * 00077 * Note that the contents of @a base are not examined, so it is possible to 00078 * use this function for constructing URLs, or for relative URLs or 00079 * repository paths. 00080 * 00081 * This function is NOT appropriate for native (local) file 00082 * paths. Only for "internal" canonicalized paths, since it uses '/' 00083 * for the separator. Further, an absolute path (for @a component) is 00084 * based on a leading '/' character. Thus, an "absolute URI" for the 00085 * @a component won't be detected. An absolute URI can only be used 00086 * for the base. 00087 */ 00088 char *svn_path_join(const char *base, 00089 const char *component, 00090 apr_pool_t *pool); 00091 00092 /** Join multiple components onto a @a base path, allocated in @a pool. The 00093 * components are terminated by a @c NULL. 00094 * 00095 * If any component is the empty string, it will be ignored. 00096 * 00097 * If any component is an absolute path, then it resets the base and 00098 * further components will be appended to it. 00099 * 00100 * See svn_path_join() for further notes about joining paths. 00101 */ 00102 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...); 00103 00104 00105 /** Get the basename of the specified canonicalized @a path. The 00106 * basename is defined as the last component of the path (ignoring any 00107 * trailing slashes). If the @a path is root ("/"), then that is 00108 * returned. Otherwise, the returned value will have no slashes in 00109 * it. 00110 * 00111 * Example: svn_path_basename("/foo/bar") -> "bar" 00112 * 00113 * The returned basename will be allocated in @a pool. 00114 * 00115 * @note If an empty string is passed, then an empty string will be returned. 00116 */ 00117 char *svn_path_basename(const char *path, apr_pool_t *pool); 00118 00119 /** Get the dirname of the specified canonicalized @a path, defined as 00120 * the path with its basename removed. 00121 * 00122 * Get the dirname of the specified @a path, defined as the path with its 00123 * basename removed. If @a path is root ("/"), it is returned unchanged. 00124 * 00125 * The returned dirname will be allocated in @a pool. 00126 */ 00127 char *svn_path_dirname(const char *path, apr_pool_t *pool); 00128 00129 /** Split @a path into a root portion and an extension such that 00130 * the root + the extension = the original path, and where the 00131 * extension contains no period (.) characters. If not @c NULL, set 00132 * @a *path_root to the root portion. If not @c NULL, set 00133 * @a *path_ext to the extension (or "" if there is no extension 00134 * found). Allocate both @a *path_root and @a *path_ext in @a pool. 00135 * 00136 * @since New in 1.5. 00137 */ 00138 void svn_path_splitext(const char **path_root, const char **path_ext, 00139 const char *path, apr_pool_t *pool); 00140 00141 /** Return the number of components in the canonicalized @a path. 00142 * 00143 * @since New in 1.1. 00144 */ 00145 apr_size_t 00146 svn_path_component_count(const char *path); 00147 00148 /** Add a @a component (a NULL-terminated C-string) to the 00149 * canonicalized @a path. @a component is allowed to contain 00150 * directory separators. 00151 * 00152 * If @a path is non-empty, append the appropriate directory separator 00153 * character, and then @a component. If @a path is empty, simply set it to 00154 * @a component; don't add any separator character. 00155 * 00156 * If the result ends in a separator character, then remove the separator. 00157 */ 00158 void svn_path_add_component(svn_stringbuf_t *path, 00159 const char *component); 00160 00161 /** Remove one component off the end of the canonicalized @a path. */ 00162 void svn_path_remove_component(svn_stringbuf_t *path); 00163 00164 /** Remove @a n components off the end of the canonicalized @a path. 00165 * Equivalent to calling svn_path_remove_component() @a n times. 00166 * 00167 * @since New in 1.1. 00168 */ 00169 void svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n); 00170 00171 /** Divide the canonicalized @a path into @a *dirpath and @a 00172 * *base_name, allocated in @a pool. 00173 * 00174 * If @a dirpath or @a base_name is NULL, then don't set that one. 00175 * 00176 * Either @a dirpath or @a base_name may be @a path's own address, but they 00177 * may not both be the same address, or the results are undefined. 00178 * 00179 * If @a path has two or more components, the separator between @a dirpath 00180 * and @a base_name is not included in either of the new names. 00181 * 00182 * examples: 00183 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre> 00184 * - <pre>"/bar" ==> "/" and "bar"</pre> 00185 * - <pre>"/" ==> "/" and "/"</pre> 00186 * - <pre>"X:/" ==> "X:/" and "X:/"</pre> 00187 * - <pre>"bar" ==> "" and "bar"</pre> 00188 * - <pre>"" ==> "" and ""</pre> 00189 */ 00190 void svn_path_split(const char *path, 00191 const char **dirpath, 00192 const char **base_name, 00193 apr_pool_t *pool); 00194 00195 00196 /** Return non-zero iff @a path is empty ("") or represents the current 00197 * directory -- that is, if prepending it as a component to an existing 00198 * path would result in no meaningful change. 00199 */ 00200 int svn_path_is_empty(const char *path); 00201 00202 /** Return TRUE if @a directory is considered a root directory on the platform 00203 * at hand, amongst which '/' on all platforms or 'X:/', '\\\\?\\X:/', 00204 * '\\\\.\\..', '\\\\server\\share' on Windows. 00205 * 00206 * @since New in 1.5. 00207 */ 00208 svn_boolean_t svn_dirent_is_root(const char *dirent, apr_size_t len); 00209 00210 00211 /** Return a new path (or URL) like @a path, but transformed such that 00212 * some types of path specification redundancies are removed. 00213 * 00214 * This involves collapsing redundant "/./" elements, removing 00215 * multiple adjacent separator characters, removing trailing 00216 * separator characters, and possibly other semantically inoperative 00217 * transformations. 00218 * 00219 * The returned path may be statically allocated, equal to @a path, or 00220 * allocated from @a pool. 00221 */ 00222 const char *svn_path_canonicalize(const char *path, apr_pool_t *pool); 00223 00224 /** Return @c TRUE iff path is canonical. Use @a pool for temporary 00225 * allocations. 00226 * 00227 * @note The test for canonicalization is currently defined as 00228 * "looks exactly the same as @c svn_path_canonicalize() would make 00229 * it look". 00230 * 00231 * @since New in 1.5. 00232 */ 00233 svn_boolean_t svn_path_is_canonical(const char *path, apr_pool_t *pool); 00234 00235 00236 /** Return an integer greater than, equal to, or less than 0, according 00237 * as @a path1 is greater than, equal to, or less than @a path2. 00238 */ 00239 int svn_path_compare_paths(const char *path1, const char *path2); 00240 00241 00242 /** Return the longest common path shared by two canonicalized paths, 00243 * @a path1 and @a path2. If there's no common ancestor, return the 00244 * empty path. 00245 * 00246 * @a path1 and @a path2 may be URLs. In order for two URLs to have 00247 * a common ancestor, they must (a) have the same protocol (since two URLs 00248 * with the same path but different protocols may point at completely 00249 * different resources), and (b) share a common ancestor in their path 00250 * component, i.e. 'protocol://' is not a sufficient ancestor. 00251 */ 00252 char *svn_path_get_longest_ancestor(const char *path1, 00253 const char *path2, 00254 apr_pool_t *pool); 00255 00256 /** Convert @a relative canonicalized path to an absolute path and 00257 * return the results in @a *pabsolute, allocated in @a pool. 00258 * 00259 * @a relative may be a URL, in which case no attempt is made to convert it, 00260 * and a copy of the URL is returned. 00261 */ 00262 svn_error_t * 00263 svn_path_get_absolute(const char **pabsolute, 00264 const char *relative, 00265 apr_pool_t *pool); 00266 00267 /** Return the path part of the canonicalized @a path in @a 00268 * *pdirectory, and the file part in @a *pfile. If @a path is a 00269 * directory, set @a *pdirectory to @a path, and @a *pfile to the 00270 * empty string. If @a path does not exist it is treated as if it is 00271 * a file, since directories do not normally vanish. 00272 */ 00273 svn_error_t * 00274 svn_path_split_if_file(const char *path, 00275 const char **pdirectory, 00276 const char **pfile, 00277 apr_pool_t *pool); 00278 00279 /** Find the common prefix of the canonicalized paths in @a targets 00280 * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a 00281 * remove_redundancies is TRUE. 00282 * 00283 * - Set @a *pcommon to the absolute path of the path or URL common to 00284 * all of the targets. If the targets have no common prefix, or 00285 * are a mix of URLs and local paths, set @a *pcommon to the 00286 * empty string. 00287 * 00288 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets 00289 * to an array of targets relative to @a *pcommon, and if 00290 * @a remove_redundancies is TRUE, omit any paths/URLs that are 00291 * descendants of another path/URL in @a targets. If *pcommon 00292 * is empty, @a *pcondensed_targets will contain full URLs and/or 00293 * absolute paths; redundancies can still be removed (from both URLs 00294 * and paths). If @a pcondensed_targets is NULL, leave it alone. 00295 * 00296 * Else if there is exactly one target, then 00297 * 00298 * - Set @a *pcommon to that target, and 00299 * 00300 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets 00301 * to an array containing zero elements. Else if 00302 * @a pcondensed_targets is NULL, leave it alone. 00303 * 00304 * If there are no items in @a targets, set @a *pcommon and (if 00305 * applicable) @a *pcondensed_targets to @c NULL. 00306 * 00307 * @note There is no guarantee that @a *pcommon is within a working 00308 * copy. */ 00309 svn_error_t * 00310 svn_path_condense_targets(const char **pcommon, 00311 apr_array_header_t **pcondensed_targets, 00312 const apr_array_header_t *targets, 00313 svn_boolean_t remove_redundancies, 00314 apr_pool_t *pool); 00315 00316 00317 /** Copy a list of canonicalized @a targets, one at a time, into @a 00318 * pcondensed_targets, omitting any targets that are found earlier in 00319 * the list, or whose ancestor is found earlier in the list. Ordering 00320 * of targets in the original list is preserved in the condensed list 00321 * of targets. Use @a pool for any allocations. 00322 * 00323 * How does this differ in functionality from svn_path_condense_targets()? 00324 * 00325 * Here's the short version: 00326 * 00327 * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-) 00328 * Order matters for updates because a multi-arg update is not 00329 * atomic, and CVS users are used to, when doing 'cvs up targetA 00330 * targetB' seeing targetA get updated, then targetB. I think the 00331 * idea is that if you're in a time-sensitive or flaky-network 00332 * situation, a user can say, "I really *need* to update 00333 * wc/A/D/G/tau, but I might as well update my whole working copy if 00334 * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if 00335 * something dies in the middles of the 'wc' update, at least the 00336 * user has 'tau' up-to-date. 00337 * 00338 * 2. Also, we have this notion of an anchor and a target for updates 00339 * (the anchor is where the update editor is rooted, the target is 00340 * the actual thing we want to update). I needed a function that 00341 * would NOT screw with my input paths so that I could tell the 00342 * difference between someone being in A/D and saying 'svn up G' and 00343 * being in A/D/G and saying 'svn up .' -- believe it or not, these 00344 * two things don't mean the same thing. svn_path_condense_targets() 00345 * plays with absolute paths (which is fine, so does 00346 * svn_path_remove_redundancies()), but the difference is that it 00347 * actually tweaks those targets to be relative to the "grandfather 00348 * path" common to all the targets. Updates don't require a 00349 * "grandfather path" at all, and even if it did, the whole 00350 * conversion to an absolute path drops the crucial difference 00351 * between saying "i'm in foo, update bar" and "i'm in foo/bar, 00352 * update '.'" 00353 */ 00354 svn_error_t * 00355 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets, 00356 const apr_array_header_t *targets, 00357 apr_pool_t *pool); 00358 00359 00360 /** Decompose the canonicalized @a path into an array of <tt>const 00361 * char *</tt> components, allocated in @a pool. If @a path is 00362 * absolute, the first component will be a lone dir separator (the 00363 * root directory). 00364 */ 00365 apr_array_header_t *svn_path_decompose(const char *path, 00366 apr_pool_t *pool); 00367 00368 /** Join an array of <tt>const char *</tt> components into a '/' 00369 * separated path, allocated in @a pool. The joined path is absolute if 00370 * the first component is a lone dir separator. 00371 * 00372 * Calling svn_path_compose() on the output of svn_path_decompose() 00373 * will return the exact same path. 00374 * 00375 * @since New in 1.5. 00376 */ 00377 const char *svn_path_compose(const apr_array_header_t *components, 00378 apr_pool_t *pool); 00379 00380 /** Test that @a name is a single path component, that is: 00381 * - not @c NULL or empty. 00382 * - not a `/'-separated directory path 00383 * - not empty or `..' 00384 */ 00385 svn_boolean_t svn_path_is_single_path_component(const char *name); 00386 00387 00388 /** 00389 * Test to see if a backpath, i.e. '..', is present in @a path. 00390 * If not, return @c FALSE. 00391 * If so, return @c TRUE. 00392 * 00393 * @since New in 1.1. 00394 */ 00395 svn_boolean_t svn_path_is_backpath_present(const char *path); 00396 00397 00398 /** Test if @a path2 is a child of @a path1. 00399 * If not, return @c NULL. 00400 * If so, return a copy of the remainder path, allocated in @a pool. 00401 * (The remainder is the component which, added to @a path1, yields 00402 * @a path2. The remainder does not begin with a dir separator.) 00403 * 00404 * Both paths must be in canonical form, and must either be absolute, 00405 * or contain no ".." components. 00406 * 00407 * If @a path2 is the same as @a path1, it is not considered a child, so the 00408 * result is @c NULL; an empty string is never returned. 00409 * 00410 * @note In 1.5 this function has been extended to allow a @c NULL @a pool 00411 * in which case a pointer into @a path2 will be returned to 00412 * identify the remainder path. 00413 * 00414 * ### todo: the ".." restriction is unfortunate, and would ideally 00415 * be lifted by making the implementation smarter. But this is not 00416 * trivial: if the path is "../foo", how do you know whether or not 00417 * the current directory is named "foo" in its parent? 00418 */ 00419 const char *svn_path_is_child(const char *path1, 00420 const char *path2, 00421 apr_pool_t *pool); 00422 00423 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal 00424 * and FALSE otherwise. 00425 * 00426 * @since New in 1.3. 00427 */ 00428 svn_boolean_t 00429 svn_path_is_ancestor(const char *path1, const char *path2); 00430 00431 /** 00432 * Check whether @a path is a valid Subversion path. 00433 * 00434 * A valid Subversion pathname is a UTF-8 string without control 00435 * characters. "Valid" means Subversion can store the pathname in 00436 * a repository. There may be other, OS-specific, limitations on 00437 * what paths can be represented in a working copy. 00438 * 00439 * ASSUMPTION: @a path is a valid UTF-8 string. This function does 00440 * not check UTF-8 validity. 00441 * 00442 * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if 00443 * invalid. 00444 * 00445 * @since New in 1.2. 00446 */ 00447 svn_error_t *svn_path_check_valid(const char *path, apr_pool_t *pool); 00448 00449 00450 /** URI/URL stuff 00451 * 00452 * @defgroup svn_path_uri_stuff URI/URL conversion 00453 * @{ 00454 */ 00455 00456 /** Return TRUE iff @a path looks like a valid absolute URL. */ 00457 svn_boolean_t svn_path_is_url(const char *path); 00458 00459 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */ 00460 svn_boolean_t svn_path_is_uri_safe(const char *path); 00461 00462 /** Return a URI-encoded copy of @a path, allocated in @a pool. */ 00463 const char *svn_path_uri_encode(const char *path, apr_pool_t *pool); 00464 00465 /** Return a URI-decoded copy of @a path, allocated in @a pool. */ 00466 const char *svn_path_uri_decode(const char *path, apr_pool_t *pool); 00467 00468 /** Extend @a url by @a component, URI-encoding that @a component 00469 * before adding it to the @a url; return the new @a url, allocated in 00470 * @a pool. If @a component is @c NULL, just return a copy of @a url, 00471 * allocated in @a pool. 00472 * 00473 * @a component need not be a single path segment, but if it contains 00474 * multiple segments, they must be separated by '/'. If @a component 00475 * is already URI-encoded, just use <tt>svn_path_join (url, component, 00476 * pool)</tt> instead. 00477 * 00478 * @a url need not be a canonical path; it may have a trailing '/'. 00479 */ 00480 const char *svn_path_url_add_component(const char *url, 00481 const char *component, 00482 apr_pool_t *pool); 00483 00484 /** 00485 * Convert @a iri (Internationalized URI) to an URI. 00486 * The return value may be the same as @a iri if it was already 00487 * a URI. Else, allocate the return value in @a pool. 00488 * 00489 * @since New in 1.1. 00490 */ 00491 const char *svn_path_uri_from_iri(const char *iri, 00492 apr_pool_t *pool); 00493 00494 /** 00495 * URI-encode certain characters in @a uri that are not valid in an URI, but 00496 * doesn't have any special meaning in @a uri at their positions. If no 00497 * characters need escaping, just return @a uri. 00498 * 00499 * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `. 00500 * This may be extended in the future to do context-dependent escaping. 00501 * 00502 * @since New in 1.1. 00503 */ 00504 const char *svn_path_uri_autoescape(const char *uri, 00505 apr_pool_t *pool); 00506 00507 /** @} */ 00508 00509 /** Charset conversion stuff 00510 * 00511 * @defgroup svn_path_charset_stuff Charset conversion 00512 * @{ 00513 */ 00514 00515 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */ 00516 svn_error_t *svn_path_cstring_from_utf8(const char **path_apr, 00517 const char *path_utf8, 00518 apr_pool_t *pool); 00519 00520 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */ 00521 svn_error_t *svn_path_cstring_to_utf8(const char **path_utf8, 00522 const char *path_apr, 00523 apr_pool_t *pool); 00524 00525 00526 /** @} */ 00527 00528 #ifdef __cplusplus 00529 } 00530 #endif /* __cplusplus */ 00531 00532 00533 #endif /* SVN_PATH_H */