#include #include #include #include #include #include #ifdef _WIN32 #include "c11threads.h" #else #include #endif #include "../logger.h" #include "../process_url.h" #include "../utils/ffmpeg.h" #include "bilibili.h" #include "extractor.h" static int get_multipagedata(char *pagedata, Multipage *multipage_struct, bool *is_page) { const char *patterns_str[1] = {"window.__INITIAL_STATE__=(.+?);\\(function"}; const str_array_t patterns = {(char **)patterns_str, 1}; str_array_t results = create_str_array(0); int r = regex_match(pagedata, patterns, &results); if (!r) { for (unsigned short i = 0; i < results.n; i++) { // DEBUG_PRINT("%s\n", results.str[i]); if (results.str[i]) { multipage_struct->json = cJSON_Parse(get_str_element(&results, i)); } } free_str_array(&results); cJSON *aid_obj = cJSON_GetObjectItem(multipage_struct->json, "aid"); cJSON *bvid_obj = cJSON_GetObjectItem(multipage_struct->json, "bvid"); cJSON *sections_obj = cJSON_GetObjectItem(multipage_struct->json, "sections"); cJSON *videoData_obj = cJSON_GetObjectItem(multipage_struct->json, "videoData"); if (aid_obj && bvid_obj && sections_obj && videoData_obj) { multipage_struct->aid = aid_obj->valueint; multipage_struct->bvid = bvid_obj->valuestring; multipage_struct->sections = create_array( sizeof(Multi_episode_data), cJSON_GetArraySize(sections_obj)); /* sections */ if (!cJSON_GetArraySize(sections_obj)) { DEBUG_PRINT("This video does not have sections, meaning that it is a " "multi-p video with only one av/bvid\n"); *is_page = 1; } cJSON *e; int i = 0; cJSON_ArrayForEach(e, sections_obj) { Multi_episode_data *section = get_element(&multipage_struct->sections, i); cJSON *season_id_obj = cJSON_GetObjectItem(e, "season_id"); cJSON *episodes_obj = cJSON_GetObjectItem(e, "episodes"); if (season_id_obj && episodes_obj) { section->season_id = season_id_obj->valueint; DEBUG_PRINT("sections[%d] season_id: %d\n", i, section->season_id); section->episodes = create_array(sizeof(Episode), cJSON_GetArraySize(episodes_obj)); cJSON *e; int j = 0; cJSON_ArrayForEach(e, episodes_obj) { cJSON *aid_obj = cJSON_GetObjectItem(e, "aid"); cJSON *bvid_obj = cJSON_GetObjectItem(e, "bvid"); cJSON *cid_obj = cJSON_GetObjectItem(e, "cid"); cJSON *title_obj = cJSON_GetObjectItem(e, "title"); if (aid_obj && bvid_obj && cid_obj && title_obj) { Episode *episode = get_element(§ion->episodes, j); episode->aid = aid_obj->valueint; episode->bvid = bvid_obj->valuestring; episode->cid = cid_obj->valueint; episode->title = title_obj->valuestring; DEBUG_PRINT("sections[%d].episodes[%d] aid: %d\n", i, j, episode->aid); DEBUG_PRINT("sections[%d].episodes[%d] bvid: %s\n", i, j, episode->bvid); DEBUG_PRINT("sections[%d].episodes[%d] cid: %d\n", i, j, episode->cid); DEBUG_PRINT("sections[%d].episodes[%d] title: %s\n", i, j, episode->title); j++; continue; } r = 1; LOG("cJSON", "Read JSON.sections[%d].episodes[%d] failed.\n", i, j); return r; } i++; continue; } r = 1; LOG("cJSON", "Read JSON.sections[%d] failed.\n", i); return r; } /* videoData */ Multipage_video_data *videoData = &multipage_struct->videoData; cJSON *title_obj = cJSON_GetObjectItem(videoData_obj, "title"); cJSON *pages_obj = cJSON_GetObjectItem(videoData_obj, "pages"); if (title_obj && pages_obj) { videoData->title = title_obj->valuestring; DEBUG_PRINT("videoData.title: %s\n", videoData->title); videoData->pages = create_array(sizeof(Video_pages_data), cJSON_GetArraySize(pages_obj)); int i = 0; cJSON *e; cJSON_ArrayForEach(e, pages_obj) { cJSON *cid_obj = cJSON_GetObjectItem(e, "cid"); cJSON *part_obj = cJSON_GetObjectItem(e, "part"); cJSON *page_obj = cJSON_GetObjectItem(e, "page"); if (cid_obj && part_obj && page_obj) { Video_pages_data *page = get_element(&videoData->pages, i); page->cid = cid_obj->valueint; page->part = part_obj->valuestring; page->page = page_obj->valueint; DEBUG_PRINT("videoData.pages[%d].cid: %d\n", i, page->cid); DEBUG_PRINT("videoData.pages[%d].part: %s\n", i, page->part); DEBUG_PRINT("videoData.pages[%d].page: %d\n", i, page->page); i++; continue; } LOG("cJSON", "Read JSON.videodata.pages[%d] failed.\n", i); return 1; } } else { LOG("cJSON", "Read JSON.videodata failed.\n"); return 1; } } else { r = 1; LOG("cJSON", "Parse pagedata JSON failed.\n"); } } return r; } static int get_dash(const char *api_resp, Dash *dash) { dash->json = cJSON_Parse(api_resp); cJSON *code_obj = cJSON_GetObjectItem(dash->json, "code"); cJSON *message_obj = cJSON_GetObjectItem(dash->json, "message"); cJSON *dashinfo_obj = cJSON_GetObjectItem(dash->json, "data"); if (cJSON_IsInvalid(dashinfo_obj)) { dashinfo_obj = cJSON_GetObjectItem(dash->json, "result"); } if (!code_obj || !message_obj || !dashinfo_obj) { LOG("cJSON", "Parse API resp_json failed.\n"); return 1; } dash->code = code_obj->valueint; dash->message = code_obj->valuestring; /* dashinfo: "data" or "result" */ DEBUG_PRINT("Key of dashinfo: %s\n", dashinfo_obj->string); Dash_info *dashinfo = &dash->dashinfo; cJSON *quality_obj = cJSON_GetObjectItem(dashinfo_obj, "quality"); cJSON *accept_description_obj = cJSON_GetObjectItem(dashinfo_obj, "accept_description"); cJSON *accept_quality_obj = cJSON_GetObjectItem(dashinfo_obj, "accept_quality"); cJSON *dash_streams_obj = cJSON_GetObjectItem(dashinfo_obj, "dash"); cJSON *format_obj = cJSON_GetObjectItem(dashinfo_obj, "format"); cJSON *durl_obj = cJSON_GetObjectItem(dashinfo_obj, "durl"); // NOTE: Optional if (!quality_obj || !accept_description_obj || !accept_quality_obj || !dash_streams_obj || !format_obj) { LOG("cJSON", "Read API resp_json.%s failed.\n", dashinfo_obj->string); return 1; } dashinfo->quality = quality_obj->valueint; DEBUG_PRINT("quality: %d\n", dashinfo->quality); dashinfo->format = format_obj->valuestring; DEBUG_PRINT("format: %s\n", dashinfo->format); dashinfo->accept_description = create_str_array(cJSON_GetArraySize(accept_quality_obj)); str_array_t *ac_d = &dashinfo->accept_description; for (unsigned char n = 0; n < cJSON_GetArraySize(accept_description_obj); n++) { cJSON *i = cJSON_GetArrayItem(accept_description_obj, n); if (!i) { LOG("cJSON", "Read API resp_json.%s.accept_description failed.\n", dashinfo_obj->string); return 1; } set_str_element(ac_d, n, i->valuestring); DEBUG_PRINT("accept_description[%hhu]: %s\n", n, get_str_element(ac_d, n)); } dashinfo->accept_quality = create_array(sizeof(int), cJSON_GetArraySize(accept_quality_obj)); generic_array_t *ac_q = &dashinfo->accept_quality; for (unsigned char n = 0; n < cJSON_GetArraySize(accept_quality_obj); n++) { cJSON *i = cJSON_GetArrayItem(accept_quality_obj, n); if (!i) { LOG("cJSON", "Read API resp_json.%s.accept_quality failed.\n", dashinfo_obj->string); return 1; } int *v = get_element(ac_q, n); *v = i->valueint; DEBUG_PRINT("accept_quality[%hhu]: %d\n", n, *v); } cJSON *video_obj = cJSON_GetObjectItem(dash_streams_obj, "video"); cJSON *audio_obj = cJSON_GetObjectItem(dash_streams_obj, "audio"); if (!video_obj || !audio_obj) { LOG("cJSON", "Read API resp_json.%s.dash failed.\n", dashinfo_obj->string); return 1; } dashinfo->dash.video = create_array(sizeof(Dash_stream), cJSON_GetArraySize(video_obj)); dashinfo->dash.audio = create_array(sizeof(Dash_stream), cJSON_GetArraySize(audio_obj)); generic_array_t *target; cJSON *dash_stream_obj; for (dash_stream_obj = video_obj, target = &dashinfo->dash.video;;) { int i = 0; cJSON *e; cJSON_ArrayForEach(e, dash_stream_obj) { cJSON *id_obj = cJSON_GetObjectItem(e, "id"); cJSON *baseUrl_obj = cJSON_GetObjectItem(e, "baseUrl"); cJSON *bandwidth_obj = cJSON_GetObjectItem(e, "bandwidth"); cJSON *mimeType_obj = cJSON_GetObjectItem(e, "mimeType"); cJSON *codecid_obj = cJSON_GetObjectItem(e, "codecid"); cJSON *codecs_obj = cJSON_GetObjectItem(e, "codecs"); if (!id_obj || !baseUrl_obj || !bandwidth_obj || !mimeType_obj || !codecid_obj || !codecs_obj) { LOG("cJSON", "Read API resp_json.%s.dash.%s[%d] failed.\n", dashinfo_obj->string, dash_stream_obj->string, i); return 1; } Dash_stream *ds = get_element(target, i); ds->id = id_obj->valueint; ds->baseUrl = baseUrl_obj->valuestring; ds->bandwidth = bandwidth_obj->valueint; ds->mimeType = mimeType_obj->valuestring; ds->codecid = codecid_obj->valueint; ds->codecs = codecs_obj->valuestring; DEBUG_PRINT("%s[%d].id: %d\n", dash_stream_obj->string, i, ds->id); DEBUG_PRINT("%s[%d].baseUrl: %s\n", dash_stream_obj->string, i, ds->baseUrl); DEBUG_PRINT("%s[%d].bandwidth: %d\n", dash_stream_obj->string, i, ds->bandwidth); DEBUG_PRINT("%s[%d].mimeType: %s\n", dash_stream_obj->string, i, ds->mimeType); DEBUG_PRINT("%s[%d].codecid: %d\n", dash_stream_obj->string, i, ds->codecid); DEBUG_PRINT("%s[%d].codecs: %s\n", dash_stream_obj->string, i, ds->codecs); i++; } if (dash_stream_obj == video_obj) { dash_stream_obj = audio_obj; target = &dashinfo->dash.audio; } else { break; } } return 0; } static int get_page_in_query(char *query, int *page) { const char *pattern = "p=(\\d+)"; str_array_t results = {0}; int r = regex_match(query, (str_array_t){(char **)&pattern, 1}, &results); if (!r) { // for (unsigned short i = 0; i < results.n; i++) { // DEBUG_PRINT("%s\n", results.str[i]); // } *page = results.n ? atoi(results.str[1]) : 1; // Download p1 by default } return r; } static int generate_api(Bilibili_options *bilibili_options, const int quality) { char params[UCHAR_MAX]; snprintf(params, sizeof(params), "avid=%d&cid=%d&bvid=%s&qn=%d&type=&otype=json&fourk=1&fnver=0&" "fnval=2000", bilibili_options->aid, bilibili_options->cid, bilibili_options->bvid, quality); bilibili_options->api = malloc(strlen(BILIBILI_API) + strlen(params) + 1); strcpy(bilibili_options->api, BILIBILI_API); strcat(bilibili_options->api, params); return 0; } static const char *mimeType2ext(const char *mimeType) { static char mimeType_l[CHAR_MAX]; strcpy(mimeType_l, mimeType); const char *exts[2]; size_t extsCount = 0; char *token = strtok(mimeType_l, "/"); while (token != NULL && extsCount < 2) { exts[extsCount++] = token; token = strtok(NULL, "/"); } if (extsCount == 2) { return exts[1]; } return "mp4"; // Cannot parse, use default } static const char *id2quality_desc(int id) { const char *desc; switch (id) { case 127: desc = "超高清 8K"; break; case 120: desc = "超清 4K"; break; case 112: desc = "高清 1080P+"; break; case 80: desc = "高清 1080P"; break; case 48: desc = "高清 720P"; break; case 32: desc = "清晰 480P"; break; case 16: desc = "流畅 360P"; break; default: desc = "Unknown resolution"; break; } return desc; } static void multipage_cleanup(Multipage *multipage_struct) { for (unsigned short i = 0; i < multipage_struct->sections.n; i++) { // free_and_nullify(multipage_struct->sections[i].episodes); Multi_episode_data *section = get_element(&multipage_struct->sections, i); free_array(§ion->episodes); } free_array(&multipage_struct->sections); free_array(&multipage_struct->videoData.pages); cJSON_Delete(multipage_struct->json); multipage_struct->json = NULL; } static void dash_cleanup(Dash *dash) { cJSON_Delete(dash->json); free_str_array(&dash->dashinfo.accept_description); free_array(&dash->dashinfo.accept_quality); free_array(&dash->dashinfo.dash.audio); free_array(&dash->dashinfo.dash.video); } static int bilibili_merge(void *v) { callback_struct_t *cb_struct = (callback_struct_t *)v; LOG("Bilibili", "Using ffmpeg to merge downloaded files ...\n"); int r = merge_av(cb_struct->videofn, cb_struct->audiofn, cb_struct->filename); free_and_nullify(cb_struct->videofn); free_and_nullify(cb_struct->audiofn); free_and_nullify(cb_struct->filename); LOG("Bilibili", "All done!\n"); return r; } static int download(Bilibili_options *bilibili_options) { Dash dash = {0}; char *resp; get(bilibili_options->api, &resp); if (get_dash(resp, &dash)) { LOG("Bilibili", "Get dash failed."); free_and_nullify(resp); dash_cleanup(&dash); return 1; }; // Download the highest resolution Dash_stream *video = get_element(&dash.dashinfo.dash.video, 0); Dash_stream *audio = get_element(&dash.dashinfo.dash.audio, 0); const char *quality_desc = id2quality_desc(video->id); static callback_struct_t callback_struct = {0}; const char *ext = mimeType2ext(video->mimeType); callback_struct.filename = malloc(strlen(bilibili_options->title) + strlen(quality_desc) + strlen(ext) + 10); if (bilibili_options->is_page) { sprintf(callback_struct.filename, "%s-P%d-[%s].%s", bilibili_options->title, bilibili_options->page, quality_desc, ext); } else { sprintf(callback_struct.filename, "%s-[%s].%s", bilibili_options->title, quality_desc, ext); } { callback_struct.videofn = malloc(strlen(callback_struct.filename) + 6); sprintf(callback_struct.videofn, "%s[%s]-%s.%s", bilibili_options->title, quality_desc, "video", ext); add_url(video->baseUrl, NULL, callback_struct.videofn, "https://www.bilibili.com", NULL, NULL); } { callback_struct.audiofn = malloc(strlen(callback_struct.filename) + 6); sprintf(callback_struct.audiofn, "%s[%s]-%s.%s", bilibili_options->title, quality_desc, "audio", mimeType2ext(audio->mimeType)); add_url(audio->baseUrl, NULL, callback_struct.audiofn, "https://www.bilibili.com", &bilibili_merge, &callback_struct); } free_and_nullify(resp); dash_cleanup(&dash); return 0; } void bilibili_extract(struct options *options) { Multipage multipage_struct = {0}; Bilibili_options bilibili_options = {options->URL}; int p = 1; char *api; if (get(options->URL, &options->pagedata)) { LOG("Bilibili", "Download pagedata failed.\n"); return; } bilibili_options.html = options->pagedata; if (get_multipagedata(options->pagedata, &multipage_struct, &bilibili_options.is_page)) { multipage_cleanup(&multipage_struct); LOG("Bilibili", "Parse pagedata failed.\n"); return; }; if (get_page_in_query(options->query, &p) || p < 1 || p > multipage_struct.videoData.pages.n) { multipage_cleanup(&multipage_struct); LOG("Bilibili", "Parse query failed.\n"); return; } Video_pages_data *page = get_element(&multipage_struct.videoData.pages, p - 1); bilibili_options.aid = multipage_struct.aid; bilibili_options.bvid = multipage_struct.bvid; bilibili_options.cid = page->cid; bilibili_options.page = p; bilibili_options.title = multipage_struct.videoData.title; DEBUG_PRINT("aid: %d\n", bilibili_options.aid); DEBUG_PRINT("bvid: %s\n", bilibili_options.bvid); DEBUG_PRINT("cid: %d\n", bilibili_options.cid); DEBUG_PRINT("is_page: %s\n", bilibili_options.is_page ? "yes" : "no"); DEBUG_PRINT("page: %d\n", bilibili_options.page); DEBUG_PRINT("title: %s\n", bilibili_options.title); if (generate_api(&bilibili_options, 127)) { free_and_nullify(bilibili_options.api); multipage_cleanup(&multipage_struct); return; } DEBUG_PRINT("Generated API: %s\n", bilibili_options.api); if (download(&bilibili_options)) { free_and_nullify(bilibili_options.api); multipage_cleanup(&multipage_struct); return; } free_and_nullify(bilibili_options.api); multipage_cleanup(&multipage_struct); }