svn_path.h

Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  * Copyright (c) 2000-2004 CollabNet.  All rights reserved.
00005  *
00006  * This software is licensed as described in the file COPYING, which
00007  * you should have received as part of this distribution.  The terms
00008  * are also available at http://subversion.tigris.org/license-1.html.
00009  * If newer versions of this license are posted there, you may use a
00010  * newer version instead, at your option.
00011  *
00012  * This software consists of voluntary contributions made by many
00013  * individuals.  For exact contribution history, see the revision
00014  * history and logs, available at http://subversion.tigris.org/.
00015  * ====================================================================
00016  * @endcopyright
00017  *
00018  * @file svn_path.h
00019  * @brief A path manipulation library
00020  *
00021  * All incoming and outgoing paths are non-null and in UTF-8, unless
00022  * otherwise documented.
00023  * 
00024  * No result path ever ends with a separator, no matter whether the
00025  * path is a file or directory, because we always canonicalize() it.
00026  *
00027  * All paths passed to the @c svn_path_xxx functions, with the exceptions of
00028  * the svn_path_canonicalize() and svn_path_internal_style() functions,
00029  * must be in canonical form.
00030  */
00031 
00032 #ifndef SVN_PATH_H
00033 #define SVN_PATH_H
00034 
00035 
00036 #include <apr_pools.h>
00037 #include <apr_tables.h>
00038 
00039 #include "svn_string.h"
00040 #include "svn_error.h"
00041 
00042 
00043 #ifdef __cplusplus
00044 extern "C" {
00045 #endif /* __cplusplus */
00046 
00047 
00048 
00049 /** Convert @a path from the local style to the canonical internal style. */
00050 const char *svn_path_internal_style(const char *path, apr_pool_t *pool);
00051 
00052 /** Convert @a path from the canonical internal style to the local style. */
00053 const char *svn_path_local_style(const char *path, apr_pool_t *pool);
00054 
00055 
00056 /** Join a base path (@a base) with a component (@a component), allocated in 
00057  * @a pool.
00058  *
00059  * If either @a base or @a component is the empty path, then the other 
00060  * argument will be copied and returned.  If both are the empty path the 
00061  * empty path is returned.
00062  *
00063  * If the @a component is an absolute path, then it is copied and returned.
00064  * Exactly one slash character ('/') is used to joined the components,
00065  * accounting for any trailing slash in @a base.
00066  *
00067  * Note that the contents of @a base are not examined, so it is possible to
00068  * use this function for constructing URLs, or for relative URLs or
00069  * repository paths.
00070  *
00071  * This function is NOT appropriate for native (local) file
00072  * paths. Only for "internal" canonicalized paths, since it uses '/'
00073  * for the separator. Further, an absolute path (for @a component) is
00074  * based on a leading '/' character.  Thus, an "absolute URI" for the
00075  * @a component won't be detected. An absolute URI can only be used
00076  * for the base.
00077  */
00078 char *svn_path_join(const char *base,
00079                     const char *component,
00080                     apr_pool_t *pool);
00081 
00082 /** Join multiple components onto a @a base path, allocated in @a pool. The
00083  * components are terminated by a @c NULL.
00084  *
00085  * If any component is the empty string, it will be ignored.
00086  *
00087  * If any component is an absolute path, then it resets the base and
00088  * further components will be appended to it.
00089  *
00090  * See svn_path_join() for further notes about joining paths.
00091  */
00092 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...);
00093 
00094 
00095 /** Get the basename of the specified canonicalized @a path.  The
00096  * basename is defined as the last component of the path (ignoring any
00097  * trailing slashes).  If the @a path is root ("/"), then that is
00098  * returned.  Otherwise, the returned value will have no slashes in
00099  * it.
00100  *
00101  * Example: svn_path_basename("/foo/bar") -> "bar"
00102  *
00103  * The returned basename will be allocated in @a pool.
00104  *
00105  * @note If an empty string is passed, then an empty string will be returned.
00106  */
00107 char *svn_path_basename(const char *path, apr_pool_t *pool);
00108 
00109 /** Get the dirname of the specified canonicalized @a path, defined as
00110  * the path with its basename removed.
00111  *
00112  * Get the dirname of the specified @a path, defined as the path with its
00113  * basename removed.  If @a path is root ("/"), it is returned unchanged.
00114  *
00115  * The returned dirname will be allocated in @a pool.
00116  */
00117 char *svn_path_dirname(const char *path, apr_pool_t *pool);
00118 
00119 /** Return the number of components in the canonicalized @a path.
00120  *
00121  * @since New in 1.1.
00122 */
00123 apr_size_t
00124 svn_path_component_count(const char *path);
00125 
00126 /** Add a @a component (a null-terminated C-string) to the
00127  * canonicalized @a path.  @a component is allowed to contain
00128  * directory separators.
00129  *
00130  * If @a path is non-empty, append the appropriate directory separator
00131  * character, and then @a component.  If @a path is empty, simply set it to
00132  * @a component; don't add any separator character.
00133  *
00134  * If the result ends in a separator character, then remove the separator.
00135  */
00136 void svn_path_add_component(svn_stringbuf_t *path, 
00137                             const char *component);
00138 
00139 /** Remove one component off the end of the canonicalized @a path. */
00140 void svn_path_remove_component(svn_stringbuf_t *path);
00141 
00142 /** Remove @a n components off the end of the canonicalized @a path.
00143  * Equivalent to calling svn_path_remove_component() @a n times.
00144  *
00145  * @since New in 1.1.
00146  */
00147 void svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n);
00148 
00149 /** Divide the canonicalized @a path into @a *dirpath and @a
00150  * *base_name, allocated in @a pool.
00151  *
00152  * If @a dirpath or @a base_name is null, then don't set that one.
00153  *
00154  * Either @a dirpath or @a base_name may be @a path's own address, but they 
00155  * may not both be the same address, or the results are undefined.
00156  *
00157  * If @a path has two or more components, the separator between @a dirpath
00158  * and @a base_name is not included in either of the new names.
00159  *
00160  *   examples:
00161  *             - <pre>"/foo/bar/baz"  ==>  "/foo/bar" and "baz"</pre>
00162  *             - <pre>"/bar"          ==>  "/"  and "bar"</pre>
00163  *             - <pre>"/"             ==>  "/"  and "/"</pre>
00164  *             - <pre>"bar"           ==>  ""   and "bar"</pre>
00165  *             - <pre>""              ==>  ""   and ""</pre>
00166  */
00167 void svn_path_split(const char *path, 
00168                     const char **dirpath,
00169                     const char **base_name,
00170                     apr_pool_t *pool);
00171 
00172 
00173 /** Return non-zero iff @a path is empty ("") or represents the current
00174  * directory -- that is, if prepending it as a component to an existing
00175  * path would result in no meaningful change.
00176  */
00177 int svn_path_is_empty(const char *path);
00178 
00179 
00180 /** Return a new path (or URL) like @a path, but transformed such that
00181  * some types of path specification redundancies are removed.
00182  *
00183  * This involves collapsing redundant "/./" elements, removing
00184  * multiple adjacent separator characters, removing trailing
00185  * separator characters, and possibly other semantically inoperative
00186  * transformations.
00187  *
00188  * The returned path may be statically allocated, equal to @a path, or
00189  * allocated from @a pool.
00190  */
00191 const char *svn_path_canonicalize(const char *path, apr_pool_t *pool);
00192 
00193 
00194 /** Return an integer greater than, equal to, or less than 0, according
00195  * as @a path1 is greater than, equal to, or less than @a path2.
00196  */
00197 int svn_path_compare_paths(const char *path1, const char *path2);
00198 
00199 
00200 /** Return the longest common path shared by two canonicalized paths,
00201  * @a path1 and @a path2.  If there's no common ancestor, return the
00202  * empty path.
00203  *
00204  * @a path1 and @a path2 may be URLs.  In order for two URLs to have 
00205  * a common ancestor, they must (a) have the same protocol (since two URLs 
00206  * with the same path but different protocols may point at completely 
00207  * different resources), and (b) share a common ancestor in their path 
00208  * component, i.e. 'protocol://' is not a sufficient ancestor.
00209  */
00210 char *svn_path_get_longest_ancestor(const char *path1,
00211                                     const char *path2,
00212                                     apr_pool_t *pool);
00213 
00214 /** Convert @a relative canonicalized path to an absolute path and
00215  * return the results in @a *pabsolute, allocated in @a pool.
00216  *
00217  * @a relative may be a URL, in which case no attempt is made to convert it, 
00218  * and a copy of the URL is returned. 
00219  */
00220 svn_error_t *
00221 svn_path_get_absolute(const char **pabsolute,
00222                       const char *relative,
00223                       apr_pool_t *pool);
00224 
00225 /** Return the path part of the canonicalized @a path in @a
00226  * *pdirectory, and the file part in @a *pfile.  If @a path is a
00227  * directory, set @a *pdirectory to @a path, and @a *pfile to the
00228  * empty string.  If @a path does not exist it is treated as if it is
00229  * a file, since directories do not normally vanish.
00230  */
00231 svn_error_t *
00232 svn_path_split_if_file(const char *path,
00233                        const char **pdirectory, 
00234                        const char **pfile,
00235                        apr_pool_t *pool);
00236 
00237 /** Find the common prefix of the canonicalized paths in @a targets
00238  * (an array of @a const char *'s), and remove redundant paths if @a
00239  * remove_redundancies is true.
00240  *
00241  *   - Set @a *pcommon to the absolute path of the path or URL common to
00242  *     all of the targets.  If the targets have no common prefix, or
00243  *     are a mix of URLs and local paths, set @a *pcommon to the
00244  *     empty string.
00245  *
00246  *   - If @a pcondensed_targets is non-null, set @a *pcondensed_targets
00247  *     to an array of targets relative to @a *pcommon, and if 
00248  *     @a remove_redundancies is true, omit any paths/URLs that are
00249  *     descendants of another path/URL in @a targets.  If *pcommon
00250  *     is empty, @a *pcondensed_targets will contain full URLs and/or
00251  *     absolute paths; redundancies can still be removed (from both URLs 
00252  *     and paths).  If @a pcondensed_targets is null, leave it alone.  
00253  *
00254  * Else if there is exactly one target, then
00255  *
00256  *   - Set @a *pcommon to that target, and
00257  *
00258  *   - If @a pcondensed_targets is non-null, set @a *pcondensed_targets
00259  *     to an array containing zero elements.  Else if
00260  *     @a pcondensed_targets is null, leave it alone.
00261  *
00262  * If there are no items in @a targets, set @a *pcommon and (if
00263  * applicable) @a *pcondensed_targets to @c NULL.
00264  *
00265  * @note There is no guarantee that @a *pcommon is within a working
00266  * copy.  */
00267 svn_error_t *
00268 svn_path_condense_targets(const char **pcommon,
00269                           apr_array_header_t **pcondensed_targets,
00270                           const apr_array_header_t *targets,
00271                           svn_boolean_t remove_redundancies,
00272                           apr_pool_t *pool);
00273 
00274 
00275 /** Copy a list of canonicalized @a targets, one at a time, into @a
00276  * pcondensed_targets, omitting any targets that are found earlier in
00277  * the list, or whose ancestor is found earlier in the list.  Ordering
00278  * of targets in the original list is preserved in the condensed list
00279  * of targets.  Use @a pool for any allocations.
00280  *
00281  * How does this differ in functionality from svn_path_condense_targets()?
00282  *
00283  * Here's the short version:
00284  * 
00285  * 1.  Disclaimer: if you wish to debate the following, talk to Karl. :-)
00286  *     Order matters for updates because a multi-arg update is not
00287  *     atomic, and CVS users are used to, when doing 'cvs up targetA
00288  *     targetB' seeing targetA get updated, then targetB.  I think the
00289  *     idea is that if you're in a time-sensitive or flaky-network
00290  *     situation, a user can say, "I really *need* to update
00291  *     wc/A/D/G/tau, but I might as well update my whole working copy if
00292  *     I can."  So that user will do 'svn up wc/A/D/G/tau wc', and if
00293  *     something dies in the middles of the 'wc' update, at least the
00294  *     user has 'tau' up-to-date.
00295  * 
00296  * 2.  Also, we have this notion of an anchor and a target for updates
00297  *     (the anchor is where the update editor is rooted, the target is
00298  *     the actual thing we want to update).  I needed a function that
00299  *     would NOT screw with my input paths so that I could tell the
00300  *     difference between someone being in A/D and saying 'svn up G' and
00301  *     being in A/D/G and saying 'svn up .' -- believe it or not, these
00302  *     two things don't mean the same thing.  svn_path_condense_targets()
00303  *     plays with absolute paths (which is fine, so does
00304  *     svn_path_remove_redundancies()), but the difference is that it
00305  *     actually tweaks those targets to be relative to the "grandfather
00306  *     path" common to all the targets.  Updates don't require a
00307  *     "grandfather path" at all, and even if it did, the whole
00308  *     conversion to an absolute path drops the crucial difference
00309  *     between saying "i'm in foo, update bar" and "i'm in foo/bar,
00310  *     update '.'"
00311  */
00312 svn_error_t *
00313 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets,
00314                              const apr_array_header_t *targets,
00315                              apr_pool_t *pool);
00316 
00317 
00318 /** Decompose the canonicalized @a path into an array of <tt>const
00319  * char *</tt> components, allocated in @a pool.  If @a path is
00320  * absolute, the first component will be a lone dir separator (the
00321  * root directory).
00322  */
00323 apr_array_header_t *svn_path_decompose(const char *path,
00324                                        apr_pool_t *pool);
00325 
00326 
00327 /** Test that @a name is a single path component, that is:
00328  *   - not @c NULL or empty.
00329  *   - not a `/'-separated directory path
00330  *   - not empty or `..'  
00331  */
00332 svn_boolean_t svn_path_is_single_path_component(const char *name);
00333 
00334 
00335 /**
00336  * Test to see if a backpath, i.e. '..', is present in @a path.
00337  * If not, return @c FALSE.
00338  * If so, return @c TRUE.
00339  *
00340  * @since New in 1.1.
00341  */
00342 svn_boolean_t svn_path_is_backpath_present(const char *path);
00343 
00344 
00345 /** Test if @a path2 is a child of @a path1.
00346  * If not, return @c NULL.
00347  * If so, return a copy of the remainder path, allocated in @a pool.
00348  * (The remainder is the component which, added to @a path1, yields
00349  * @a path2.  The remainder does not begin with a dir separator.)  
00350  *
00351  * Both paths must be in canonical form, and must either be absolute,
00352  * or contain no ".." components.
00353  *
00354  * If @a path2 is the same as @a path1, it is not considered a child, so the
00355  * result is @c NULL; an empty string is never returned.
00356  *
00357  * ### todo: the ".." restriction is unfortunate, and would ideally
00358  * be lifted by making the implementation smarter.  But this is not
00359  * trivial: if the path is "../foo", how do you know whether or not
00360  * the current directory is named "foo" in its parent?
00361  */
00362 const char *svn_path_is_child(const char *path1,
00363                               const char *path2,
00364                               apr_pool_t *pool);
00365 
00366 /** Return true if @a path1 is an ancestor of @a path2 or the paths are equal
00367  * and false otherwise.
00368  *
00369  * @since New in 1.3.
00370  */
00371 svn_boolean_t
00372 svn_path_is_ancestor(const char *path1, const char *path2);
00373 
00374 /**
00375  * Check whether @a path is a valid Subversion path.
00376  *
00377  * A valid Subversion pathname is a UTF-8 string without control
00378  * characters.  "Valid" means Subversion can store the pathname in
00379  * a repository.  There may be other, OS-specific, limitations on
00380  * what paths can be represented in a working copy.
00381  *
00382  * ASSUMPTION: @a path is a valid UTF-8 string.  This function does
00383  * not check UTF-8 validity.
00384  *
00385  * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
00386  * invalid.
00387  * 
00388  * @since New in 1.2.
00389  */
00390 svn_error_t *svn_path_check_valid(const char *path, apr_pool_t *pool);
00391 
00392 
00393 /** URI/URL stuff
00394  *
00395  * @defgroup svn_path_uri_stuff URI/URL stuff
00396  * @{
00397  */
00398 
00399 /** Return @c TRUE iff @a path looks like a valid URL, @c FALSE otherwise. */
00400 svn_boolean_t svn_path_is_url(const char *path);
00401 
00402 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
00403 svn_boolean_t svn_path_is_uri_safe(const char *path);
00404 
00405 /** Return a URI-encoded copy of @a path, allocated in @a pool. */
00406 const char *svn_path_uri_encode(const char *path, apr_pool_t *pool);
00407 
00408 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
00409 const char *svn_path_uri_decode(const char *path, apr_pool_t *pool);
00410 
00411 /** Extend @a url by a single @a component, URI-encoding that @a component
00412  * before adding it to the @a url.  Return the new @a url, allocated in
00413  * @a pool.  Notes: if @a component is @c NULL, just return a copy or @a url
00414  * allocated in @a pool; if @a component is already URI-encoded, calling
00415  * code should just use <tt>svn_path_join (url, component, pool)</tt>.  @a url
00416  * does not need to be a canonical path, it may have trailing '/'.
00417  */
00418 const char *svn_path_url_add_component(const char *url,
00419                                        const char *component,
00420                                        apr_pool_t *pool);
00421 
00422 /**
00423  * Convert @a iri (Internationalized URI) to an URI.
00424  * The return value may be the same as @a iri if it was already
00425  * a URI.  Else, allocate the return value in @a pool.
00426  *
00427  * @since New in 1.1.
00428  */
00429 const char *svn_path_uri_from_iri(const char *iri,
00430                                   apr_pool_t *pool);
00431 
00432 /**
00433  * URI-encode certain characters in @a uri that are not valid in an URI, but
00434  * doesn't have any special meaning in @a uri at their positions.  If no
00435  * characters need escaping, just return @a uri.
00436  *
00437  * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
00438  * This may be extended in the future to do context-dependent escaping.
00439  *
00440  * @since New in 1.1.
00441  */
00442 const char *svn_path_uri_autoescape(const char *uri,
00443                                     apr_pool_t *pool);
00444 
00445 /** @} */
00446 
00447 /** Charset conversion stuff
00448  *
00449  * @defgroup svn_path_charset_stuff Charset conversion stuff
00450  * @{
00451  */
00452 
00453 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
00454 svn_error_t *svn_path_cstring_from_utf8(const char **path_apr,
00455                                         const char *path_utf8,
00456                                         apr_pool_t *pool);
00457 
00458 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
00459 svn_error_t *svn_path_cstring_to_utf8(const char **path_utf8,
00460                                       const char *path_apr,
00461                                       apr_pool_t *pool);
00462 
00463 
00464 /** @} */
00465 
00466 #ifdef __cplusplus
00467 }
00468 #endif /* __cplusplus */
00469 
00470 
00471 #endif /* SVN_PATH_H */

Generated on Tue Oct 30 01:59:52 2007 for Subversion by  doxygen 1.5.2