diff options
author | Mole Shang <[email protected]> | 2023-08-08 23:20:05 +0800 |
---|---|---|
committer | Mole Shang <[email protected]> | 2023-08-08 23:20:05 +0800 |
commit | 095851562f33cb9b57300673af4897680bc947d5 (patch) | |
tree | 09c3a0def7af88f4a4212adfc932fcb0b473fad1 | |
parent | e63cddca442dd4f8f7124e7827e347fe5046e416 (diff) | |
download | hinata-095851562f33cb9b57300673af4897680bc947d5.tar.gz hinata-095851562f33cb9b57300673af4897680bc947d5.tar.bz2 hinata-095851562f33cb9b57300673af4897680bc947d5.zip |
extractors: support haokan videos
-rw-r--r-- | src/extractors/extractor.c | 10 | ||||
-rw-r--r-- | src/extractors/extractor.h | 4 | ||||
-rw-r--r-- | src/extractors/haokan.c | 46 | ||||
-rw-r--r-- | src/extractors/haokan.h | 8 |
4 files changed, 64 insertions, 4 deletions
diff --git a/src/extractors/extractor.c b/src/extractors/extractor.c index 7f5e05c..ccc1ec6 100644 --- a/src/extractors/extractor.c +++ b/src/extractors/extractor.c @@ -1,10 +1,13 @@ #include <stdlib.h> +#include "../utils/utils.h" #include "bilibili.h" #include "extractor.h" -#include "../utils/utils.h" +#include "haokan.h" -Site_map site_map = {{{"www.bilibili.com", SITE_BILIBILI}}, 1}; +Site_map site_map = { + {{"www.bilibili.com", SITE_BILIBILI}, {"haokan.baidu.com", SITE_HAOKAN}}, + 2}; void options_cleanup(Options *options) { free_and_nullify(options->URL); @@ -19,6 +22,9 @@ int extract(void *v) { case SITE_BILIBILI: bilibili_extract(options); break; + case SITE_HAOKAN: + haokan_extract(options); + break; } options_cleanup(options); return 0; diff --git a/src/extractors/extractor.h b/src/extractors/extractor.h index d3ebeec..347662b 100644 --- a/src/extractors/extractor.h +++ b/src/extractors/extractor.h @@ -6,14 +6,14 @@ #include <stdbool.h> #include <stddef.h> -enum site { SITE_BILIBILI }; +enum site { SITE_BILIBILI, SITE_HAOKAN }; typedef enum site site_t; typedef struct site_map { struct { char domain[SHRT_MAX]; site_t site; - } pairs[1]; + } pairs[2]; unsigned char size; } Site_map; diff --git a/src/extractors/haokan.c b/src/extractors/haokan.c new file mode 100644 index 0000000..d2d0379 --- /dev/null +++ b/src/extractors/haokan.c @@ -0,0 +1,46 @@ +#include "haokan.h" +#include "../logger.h" +#include "../process_url.h" +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +void haokan_extract(Options *options) { + char *resp, *title, *videoURL; + get(options->URL, &resp); + const char *patterns_str[2] = {"<div class='ssr-video-title\'>(.*?)</div>", + "\"playurl\":\"(http.+?)\""}; + const str_array_t patterns = {(char **)patterns_str, 2}; + str_array_t results = create_str_array(0); + int r = regex_match(resp, patterns, &results); + // Should match exactly two results in HTML, otherwise error out. + if (!r && results.n == 2) { + // for (unsigned short i = 0; i < results.n; i++) { + // DEBUG_PRINT("%s\n", results.str[i]); + // } + title = results.str[0]; + substitute_str(results.str[1], "\\\\/", "/", &videoURL); + DEBUG_PRINT("title: %s\n", title); + DEBUG_PRINT("videoURL: %s\n", videoURL); + + char *ct = NULL; + get_info(videoURL, NULL, &ct); + if (ct == NULL) { + goto end; + } + const char *ext = mimeType2ext(ct); + char *filename = malloc(strlen(title) + strlen(ct) + 2); + sprintf(filename, "%s.%s", title, ext); + + add_url(videoURL, NULL, filename, "https://haokan.baidu.com", NULL, NULL); + + end: + free_str_array(&results); + free_and_nullify(filename); + free_and_nullify(videoURL); + return; + } + LOG("haokan", "Download failed.\n"); + free_str_array(&results); +} diff --git a/src/extractors/haokan.h b/src/extractors/haokan.h new file mode 100644 index 0000000..f258a4d --- /dev/null +++ b/src/extractors/haokan.h @@ -0,0 +1,8 @@ +#ifndef HAOKAN_H_ +#define HAOKAN_H_ + +#include "extractor.h" + +void haokan_extract(struct options *); + +#endif |