diff options
| author | Mole Shang <135e2@135e2.dev> | 2023-08-06 18:45:37 +0800 | 
|---|---|---|
| committer | Mole Shang <135e2@135e2.dev> | 2023-08-06 18:45:37 +0800 | 
| commit | bde03538d66c37d5690ab321173eb83ecfcaf2ff (patch) | |
| tree | ae7bc81f0ca5ff8562afa61bbd7d0af94195b25a | |
| parent | c21d660c0932c45bde08a92ffa2686fd472b1b9e (diff) | |
| download | hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.gz hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.bz2 hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.zip | |
process_url: strip illegal characters in filename
There are edge cases when a retrived title contains illegal characters
(e.g. '/').
Replace it with a space to ensure a valid filename.
Reproducible case:
https://www.bilibili.com/video/av20827366/
(title="【2K/60fps】这可能是我做过最美的miku了【boomclap布料解算版】")
| -rw-r--r-- | src/process_url.c | 8 | ||||
| -rw-r--r-- | src/utils.c | 12 | ||||
| -rw-r--r-- | src/utils.h | 2 | 
3 files changed, 22 insertions, 0 deletions
| diff --git a/src/process_url.c b/src/process_url.c index 4bfce8d..7a92013 100644 --- a/src/process_url.c +++ b/src/process_url.c @@ -29,6 +29,9 @@ extern Site_map site_map;  Options options;  static queue_t dl_queue; +const char illegal_char[] = {'/', '\\', '|', '<', '>', +                             ':', '"',  '?', '*', '\0'}; +  thrd_t tid[MAX_THREAD];  mtx_t mtx;  cnd_t cnd; @@ -516,6 +519,11 @@ void add_url(const char *URL, const char *outdir, const char *fn,    } else {      filename = malloc(strlen(fn) + 1);      strcpy(filename, fn); +    for (unsigned char i = 0; illegal_char[i] != '\0'; i++) { +      if (repchr(filename, illegal_char[i], ' ')) +        DEBUG_PRINT("Found illegal character '%c' in filename, replacing ...\n", +                    illegal_char[i]); +    }    }    // Pass our cache (outdir_g) to parse_url() diff --git a/src/utils.c b/src/utils.c index 1132a94..ee41735 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,4 +1,5 @@  #include <pcre2.h> +#include <stddef.h>  #include <stdio.h>  #include <stdlib.h>  #include <string.h> @@ -83,6 +84,17 @@ int regex_match(const char *subject, str_array_t patterns,    return 0;  } +int repchr(char *str, char t, char r) { +  int c = 0; +  for (size_t i = 0; str[i] != '\0'; i++) { +    if (str[i] == t) { +      str[i] = r; +      c++; +    } +  } +  return c; +} +  generic_array_t create_array(size_t elem_size, size_t n) {    generic_array_t array;    array.data = n ? malloc(elem_size * n) : NULL; diff --git a/src/utils.h b/src/utils.h index e4140a6..79c58ca 100644 --- a/src/utils.h +++ b/src/utils.h @@ -28,6 +28,8 @@ typedef struct queue {  int regex_match(const char *, str_array_t, str_array_t *); +int repchr(char *str, char t, char r); +  generic_array_t create_array(size_t elem_size, size_t n);  void free_array(generic_array_t *array); | 
