PHP escapeshellarg with unicode/utf-8 support
By default escapeshellarg will strip any unicode characters. You can in some cases solve this by setting the locale to a utf-8 variant, but that might not always work.
Another way to do this is to write a custom escapeshellarg function:
function mb_escapeshellarg($arg)
{
if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
return '"' . str_replace(array('"', '%'), array('', ''), $arg) . '"';
} else {
return "'" . str_replace("'", "'\\''", $arg) . "'";
}
}
The code above is translated from the C source of PHP.
Below is the C code for this function from the file ext/standard/exec.c, if you want to check. Copyright held by respective authors according to the license.
/* {{{ php_escape_shell_arg
*/
PHPAPI char *php_escape_shell_arg(char *str)
{
int x, y = 0, l = strlen(str);
char *cmd;
size_t estimate = (4 * l) + 3;
TSRMLS_FETCH();
cmd = safe_emalloc(4, l, 3); /* worst case */
#ifdef PHP_WIN32
cmd[y++] = '"';
#else
cmd[y++] = '\'';
#endif
for (x = 0; x < l; x++) {
int mb_len = php_mblen(str + x, (l - x));
/* skip non-valid multibyte characters */
if (mb_len < 0) {
continue;
} else if (mb_len > 1) {
memcpy(cmd + y, str + x, mb_len);
y += mb_len;
x += mb_len - 1;
continue;
}
switch (str[x]) {
#ifdef PHP_WIN32
case '"':
case '%':
cmd[y++] = ' ';
break;
#else
case '\'':
cmd[y++] = '\'';
cmd[y++] = '\\';
cmd[y++] = '\'';
#endif
/* fall-through */
default:
cmd[y++] = str[x];
}
}
#ifdef PHP_WIN32
cmd[y++] = '"';
#else
cmd[y++] = '\'';
#endif
cmd[y] = '\0';
if ((estimate - y) > 4096) {
/* realloc if the estimate was way overill
* Arbitrary cutoff point of 4096 */
cmd = erealloc(cmd, y + 1);
}
return cmd;
}
/* }}} */