diff options
author | Mole Shang <[email protected]> | 2023-08-06 18:45:37 +0800 |
---|---|---|
committer | Mole Shang <[email protected]> | 2023-08-06 18:45:37 +0800 |
commit | bde03538d66c37d5690ab321173eb83ecfcaf2ff (patch) | |
tree | ae7bc81f0ca5ff8562afa61bbd7d0af94195b25a /src | |
parent | c21d660c0932c45bde08a92ffa2686fd472b1b9e (diff) | |
download | hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.gz hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.bz2 hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.zip |
process_url: strip illegal characters in filename
There are edge cases when a retrived title contains illegal characters
(e.g. '/').
Replace it with a space to ensure a valid filename.
Reproducible case:
https://www.bilibili.com/video/av20827366/
(title="【2K/60fps】这可能是我做过最美的miku了【boomclap布料解算版】")
Diffstat (limited to 'src')
-rw-r--r-- | src/process_url.c | 8 | ||||
-rw-r--r-- | src/utils.c | 12 | ||||
-rw-r--r-- | src/utils.h | 2 |
3 files changed, 22 insertions, 0 deletions
diff --git a/src/process_url.c b/src/process_url.c index 4bfce8d..7a92013 100644 --- a/src/process_url.c +++ b/src/process_url.c @@ -29,6 +29,9 @@ extern Site_map site_map; Options options; static queue_t dl_queue; +const char illegal_char[] = {'/', '\\', '|', '<', '>', + ':', '"', '?', '*', '\0'}; + thrd_t tid[MAX_THREAD]; mtx_t mtx; cnd_t cnd; @@ -516,6 +519,11 @@ void add_url(const char *URL, const char *outdir, const char *fn, } else { filename = malloc(strlen(fn) + 1); strcpy(filename, fn); + for (unsigned char i = 0; illegal_char[i] != '\0'; i++) { + if (repchr(filename, illegal_char[i], ' ')) + DEBUG_PRINT("Found illegal character '%c' in filename, replacing ...\n", + illegal_char[i]); + } } // Pass our cache (outdir_g) to parse_url() diff --git a/src/utils.c b/src/utils.c index 1132a94..ee41735 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,4 +1,5 @@ #include <pcre2.h> +#include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -83,6 +84,17 @@ int regex_match(const char *subject, str_array_t patterns, return 0; } +int repchr(char *str, char t, char r) { + int c = 0; + for (size_t i = 0; str[i] != '\0'; i++) { + if (str[i] == t) { + str[i] = r; + c++; + } + } + return c; +} + generic_array_t create_array(size_t elem_size, size_t n) { generic_array_t array; array.data = n ? malloc(elem_size * n) : NULL; diff --git a/src/utils.h b/src/utils.h index e4140a6..79c58ca 100644 --- a/src/utils.h +++ b/src/utils.h @@ -28,6 +28,8 @@ typedef struct queue { int regex_match(const char *, str_array_t, str_array_t *); +int repchr(char *str, char t, char r); + generic_array_t create_array(size_t elem_size, size_t n); void free_array(generic_array_t *array); |