summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMole Shang <[email protected]>2023-08-06 18:45:37 +0800
committerMole Shang <[email protected]>2023-08-06 18:45:37 +0800
commitbde03538d66c37d5690ab321173eb83ecfcaf2ff (patch)
treeae7bc81f0ca5ff8562afa61bbd7d0af94195b25a /src
parentc21d660c0932c45bde08a92ffa2686fd472b1b9e (diff)
downloadhinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.gz
hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.tar.bz2
hinata-bde03538d66c37d5690ab321173eb83ecfcaf2ff.zip
process_url: strip illegal characters in filename
There are edge cases when a retrived title contains illegal characters (e.g. '/'). Replace it with a space to ensure a valid filename. Reproducible case: https://www.bilibili.com/video/av20827366/ (title="【2K/60fps】这可能是我做过最美的miku了【boomclap布料解算版】")
Diffstat (limited to 'src')
-rw-r--r--src/process_url.c8
-rw-r--r--src/utils.c12
-rw-r--r--src/utils.h2
3 files changed, 22 insertions, 0 deletions
diff --git a/src/process_url.c b/src/process_url.c
index 4bfce8d..7a92013 100644
--- a/src/process_url.c
+++ b/src/process_url.c
@@ -29,6 +29,9 @@ extern Site_map site_map;
Options options;
static queue_t dl_queue;
+const char illegal_char[] = {'/', '\\', '|', '<', '>',
+ ':', '"', '?', '*', '\0'};
+
thrd_t tid[MAX_THREAD];
mtx_t mtx;
cnd_t cnd;
@@ -516,6 +519,11 @@ void add_url(const char *URL, const char *outdir, const char *fn,
} else {
filename = malloc(strlen(fn) + 1);
strcpy(filename, fn);
+ for (unsigned char i = 0; illegal_char[i] != '\0'; i++) {
+ if (repchr(filename, illegal_char[i], ' '))
+ DEBUG_PRINT("Found illegal character '%c' in filename, replacing ...\n",
+ illegal_char[i]);
+ }
}
// Pass our cache (outdir_g) to parse_url()
diff --git a/src/utils.c b/src/utils.c
index 1132a94..ee41735 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,4 +1,5 @@
#include <pcre2.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -83,6 +84,17 @@ int regex_match(const char *subject, str_array_t patterns,
return 0;
}
+int repchr(char *str, char t, char r) {
+ int c = 0;
+ for (size_t i = 0; str[i] != '\0'; i++) {
+ if (str[i] == t) {
+ str[i] = r;
+ c++;
+ }
+ }
+ return c;
+}
+
generic_array_t create_array(size_t elem_size, size_t n) {
generic_array_t array;
array.data = n ? malloc(elem_size * n) : NULL;
diff --git a/src/utils.h b/src/utils.h
index e4140a6..79c58ca 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -28,6 +28,8 @@ typedef struct queue {
int regex_match(const char *, str_array_t, str_array_t *);
+int repchr(char *str, char t, char r);
+
generic_array_t create_array(size_t elem_size, size_t n);
void free_array(generic_array_t *array);