Sil*_*lan 10 c realpath canonicalization
realpath 做我需要的,但只有在路径中的文件确实存在时才有效.
无论目录/文件是否实际存在,我都需要一个从字符串(例如../some/./directory/a/b/c/../d到some/directory/a/b/d)返回规范化路径的函数
基本上相当于PathCanonicalize在Windows上.
这样的功能是否已经存在?
我认为没有任何标准库函数可用于此.
您可以ap_getparents()在Apache httpd源代码文件中使用该函数server/util.c.我相信它完全符合您的要求:https://github.com/apache/httpd/blob/trunk/server/util.c#L500
#ifdef WIN32
#define IS_SLASH(s) ((s == '/') || (s == '\\'))
#else
#define IS_SLASH(s) (s == '/')
#endif
void ap_getparents(char *name)
{
char *next;
int l, w, first_dot;
/* Four paseses, as per RFC 1808 */
/* a) remove ./ path segments */
for (next = name; *next && (*next != '.'); next++) {
}
l = w = first_dot = next - name;
while (name[l] != '\0') {
if (name[l] == '.' && IS_SLASH(name[l + 1])
&& (l == 0 || IS_SLASH(name[l - 1])))
l += 2;
else
name[w++] = name[l++];
}
/* b) remove trailing . path, segment */
if (w == 1 && name[0] == '.')
w--;
else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
w--;
name[w] = '\0';
/* c) remove all xx/../ segments. (including leading ../ and /../) */
l = first_dot;
while (name[l] != '\0') {
if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
&& (l == 0 || IS_SLASH(name[l - 1]))) {
int m = l + 3, n;
l = l - 2;
if (l >= 0) {
while (l >= 0 && !IS_SLASH(name[l]))
l--;
l++;
}
else
l = 0;
n = l;
while ((name[n] = name[m]))
(++n, ++m);
}
else
++l;
}
/* d) remove trailing xx/.. segment. */
if (l == 2 && name[0] == '.' && name[1] == '.')
name[0] = '\0';
else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
&& IS_SLASH(name[l - 3])) {
l = l - 4;
if (l >= 0) {
while (l >= 0 && !IS_SLASH(name[l]))
l--;
l++;
}
else
l = 0;
name[l] = '\0';
}
}
Run Code Online (Sandbox Code Playgroud)
(这是假设您的项目中重复使用Apache Licensed代码是可以接受的.)
Python源代码有几个平台的os.path.normpath实现.不幸的是,在Python中,POSIX一个(在Lib/posixpath.py中,对于Python 3,第318行,或者对于Python 2,第308行),但是通用逻辑可以很容易地在C中重新实现(函数非常紧凑).经过多年的使用测试.
Python解释器和标准库源代码中还有其他平台normpath实现,因此便携式解决方案可以是这些的组合.
可能用C语言编写的其他系统/库也有相同的实现,因为normpath函数在安全意义上是至关重要的.
(并且拥有Python代码的主要优点是能够使用任何甚至随机的并行输入在C中测试您的函数 - 这种测试对于使函数安全非常重要)
根据您的问题陈述,以下内容完全符合您的要求.大部分代码来自path.c注释中的链接.../添加了删除前面的修改以符合您的问题陈述:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void pathCanonicalize (char *path);
int main (int argc, char **argv)
{
if (argc < 2) {
fprintf (stderr, "error: insufficient input, usage: %s <path>\n",
argv[0]);
return 1;
}
char *fullpath = strdup (argv[1]);
if (!fullpath) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
pathCanonicalize (fullpath);
printf ("\n original : %s\n canonical: %s\n\n", argv[1], fullpath);
free (fullpath);
return 0;
}
void pathCanonicalize (char *path)
{
size_t i;
size_t j;
size_t k;
//Move to the beginning of the string
i = 0;
k = 0;
//Replace backslashes with forward slashes
while (path[i] != '\0') {
//Forward slash or backslash separator found?
if (path[i] == '/' || path[i] == '\\') {
path[k++] = '/';
while (path[i] == '/' || path[i] == '\\')
i++;
} else {
path[k++] = path[i++];
}
}
//Properly terminate the string with a NULL character
path[k] = '\0';
//Move back to the beginning of the string
i = 0;
j = 0;
k = 0;
//Parse the entire string
do {
//Forward slash separator found?
if (path[i] == '/' || path[i] == '\0') {
//"." element found?
if ((i - j) == 1 && !strncmp (path + j, ".", 1)) {
//Check whether the pathname is empty?
if (k == 0) {
if (path[i] == '\0') {
path[k++] = '.';
} else if (path[i] == '/' && path[i + 1] == '\0') {
path[k++] = '.';
path[k++] = '/';
}
} else if (k > 1) {
//Remove the final slash if necessary
if (path[i] == '\0')
k--;
}
}
//".." element found?
else if ((i - j) == 2 && !strncmp (path + j, "..", 2)) {
//Check whether the pathname is empty?
if (k == 0) {
path[k++] = '.';
path[k++] = '.';
//Append a slash if necessary
if (path[i] == '/')
path[k++] = '/';
} else if (k > 1) {
//Search the path for the previous slash
for (j = 1; j < k; j++) {
if (path[k - j - 1] == '/')
break;
}
//Slash separator found?
if (j < k) {
if (!strncmp (path + k - j, "..", 2)) {
path[k++] = '.';
path[k++] = '.';
} else {
k = k - j - 1;
}
//Append a slash if necessary
if (k == 0 && path[0] == '/')
path[k++] = '/';
else if (path[i] == '/')
path[k++] = '/';
}
//No slash separator found?
else {
if (k == 3 && !strncmp (path, "..", 2)) {
path[k++] = '.';
path[k++] = '.';
//Append a slash if necessary
if (path[i] == '/')
path[k++] = '/';
} else if (path[i] == '\0') {
k = 0;
path[k++] = '.';
} else if (path[i] == '/' && path[i + 1] == '\0') {
k = 0;
path[k++] = '.';
path[k++] = '/';
} else {
k = 0;
}
}
}
} else {
//Copy directory name
memmove (path + k, path + j, i - j);
//Advance write pointer
k += i - j;
//Append a slash if necessary
if (path[i] == '/')
path[k++] = '/';
}
//Move to the next token
while (path[i] == '/')
i++;
j = i;
}
else if (k == 0) {
while (path[i] == '.' || path[i] == '/') {
j++,i++;
}
}
} while (path[i++] != '\0');
//Properly terminate the string with a NULL character
path[k] = '\0';
}
Run Code Online (Sandbox Code Playgroud)
使用/输出
$ ./bin/pathcanonical ../some/./directory/a/b/c/../d
original : ../some/./directory/a/b/c/../d
canonical: some/directory/a/b/d
Run Code Online (Sandbox Code Playgroud)
另一种尝试.这个的怪癖/特点:
资源:
#include <stdlib.h>
#include <string.h>
int
pathcanon(const char *srcpath, char *dstpath, size_t sz)
{
size_t plen = strlen(srcpath) + 1, chk;
char wtmp[plen], *tokv[plen], *s, *tok, *sav;
int i, ti, relpath;
relpath = (*srcpath == '/') ? 0 : 1;
/* make a local copy of srcpath so strtok(3) won't mangle it */
ti = 0;
(void) strcpy(wtmp, srcpath);
tok = strtok_r(wtmp, "/", &sav);
while (tok != NULL) {
if (strcmp(tok, "..") == 0) {
if (ti > 0) {
ti--;
}
} else if (strcmp(tok, ".") != 0) {
tokv[ti++] = tok;
}
tok = strtok_r(NULL, "/", &sav);
}
chk = 0;
s = dstpath;
/*
* Construct canonicalized result, checking for room as we
* go. Running out of space leaves dstpath unusable: written
* to and *not* cleanly NUL-terminated.
*/
for (i = 0; i < ti; i++) {
size_t l = strlen(tokv[i]);
if (i > 0 || !relpath) {
if (++chk >= sz) return -1;
*s++ = '/';
}
chk += l;
if (chk >= sz) return -1;
strcpy(s, tokv[i]);
s += l;
}
if (s == dstpath) {
if (++chk >= sz) return -1;
*s++ = relpath ? '.' : '/';
}
*s = '\0';
return 0;
}
Run Code Online (Sandbox Code Playgroud)
编辑:当s == dstpath时错过了检查房间.合法的呼叫者可能会提供超过0或1个字节的目标存储空间,但这是一个艰难的世界.