#include "utils.h"
#include <curl/curl.h>
#include <curl/easy.h>
#include <curl/header.h>
#include <curl/system.h>
#include <curl/urlapi.h>
#include <limits.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include "c11threads.h"
#else
#include <threads.h>
#endif

#include "nuklear.h"

#include "extractors/extractor.h"
#include "logger.h"
#include "process_url.h"

/* NOTICE: the global curl_conf pointer will only stay valid during downloading,
 * otherwise, ALWAYS point it to NULL. */
static curl_conf_t *curl_conf;
extern Site_map site_map;
Options options;
static queue_t dl_queue;

const char illegal_char[] = {'/', '\\', '|', '<', '>',
                             ':', '"',  '?', '*', '\0'};

thrd_t tid[MAX_THREAD];
mtx_t mtx;
cnd_t cnd;
bool corrupted;
static const char *outdir_g, *referer_g;
static callback_t callback_g;
static callback_struct_t *p_callback_struct_g;
static CURLU *h;

/*NOTE: Use logger(X) (defined as a generic macro) to log errors. */
static bool logerr_b(CURLcode r) {
  if (r && !corrupted) {
    LOG("libcurl", "Error %d: %s\n", r, ERRTOSTRING(r));
    corrupted = true;
  }
  return r;
}

static bool logerr_h(CURLHcode r) {
  if (r) {
    const char *err_str;
    switch (r) {
    case CURLHE_BADINDEX:
      err_str = "header exists but not with this index";
      break;
    case CURLHE_MISSING:
      // Allow no headers
      err_str = "no such header exists";
      DEBUG_PRINT("Header Error %d: %s\n", r, err_str);
      return r;
      break;
    case CURLHE_NOHEADERS:
      err_str = "no headers at all exist (yet)";
      break;
    case CURLHE_NOREQUEST:
      err_str = "no request with this number was used";
      break;
    case CURLHE_OUT_OF_MEMORY:
      err_str = "out of memory while processing";
      break;
    case CURLHE_BAD_ARGUMENT:
      err_str = "a function argument was not okay";
      break;
    case CURLHE_NOT_BUILT_IN:
      err_str = "if API was disabled in the build";
      break;
    default:
      err_str = "unknown error";
      break;
    }
    LOG("libcurl", "Header Error %d: %s\n", r, err_str);
    corrupted = true;
  }
  return r;
}

static bool logerr_u(CURLUcode r) {
  switch (r) {
  case CURLUE_NO_QUERY:
    // Accept no query
    DEBUG_PRINT("The URL has no query.\n");
    break;
  case 0:
    break;
  default:
    LOG("libcurl", "Parse Error %d: Invalid URL\n", r);
    break;
  }
  return r;
}

static void curl_easy_setcommonopts(CURL *curl) {
  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  curl_easy_setopt(curl, CURLOPT_AUTOREFERER, 1L);
  curl_easy_setopt(
      curl, CURLOPT_USERAGENT,
      "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/116.0");
  curl_easy_setopt(curl, CURLOPT_REFERER, referer_g);
  /* enable all supported built-in compressions,
   * since serveral sites enable gzip encoding */
  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
}

static int progress_callback(void *clientp, curl_off_t dltotal,
                             curl_off_t dlnow, curl_off_t ultotal,
                             curl_off_t ulnow) {
  thrd_info_t *ti = (thrd_info_t *)clientp;
  ti->curl_c->dlnow_per_thrd[ti->no] = dlnow;
  if (ti->curl_c->total_thrd == 1) {
    ti->curl_c->dltotal = dltotal;
  }
  // Return non-zero to abort download
  return corrupted;
}

static size_t write2str(void *ptr, size_t size, size_t nmemb, str_data_t *s) {
  size_t new_len = s->len + size * nmemb;
  s->string = realloc(s->string, new_len + 1);
  memcpy(s->string + s->len, ptr, size * nmemb);
  s->string[new_len] = '\0';
  s->len = new_len;

  return size * nmemb;
}

static void gen_fullpathfn(char *fullpathfn, const char *outdir,
                           const char *fn) {
  sprintf(fullpathfn, "%s%s%s", outdir,
          outdir[strlen(outdir) - 1] == SPLITTER_CHAR ? "" : SPLITTER_STR, fn);
}

static int parse_url(const char *URL, const char *outdir, char *fn) {
  CURLUcode ue = logerr(curl_url_set(h, CURLUPART_URL, URL, 0));
  if (ue && ue != CURLUE_NO_QUERY) {
    return 1;
  }
  char *domain, *path, *query;

  if (ue == CURLUE_NO_QUERY) {
    query = NULL;
  } else {
    ue = logerr(curl_url_get(h, CURLUPART_QUERY, &query, 0));
  }
  ue = curl_url_get(h, CURLUPART_HOST, &domain, 0);
  if (ue) {
    return 1;
  }
  ue = logerr(curl_url_get(h, CURLUPART_PATH, &path, 0));
  if (ue) {
    return 1;
  }

  DEBUG_PRINT("Domain: %s\n", domain);
  DEBUG_PRINT("Path: %s\n", path);
  DEBUG_PRINT("Query: %s\n", query);

  for (unsigned short i = 0; i < site_map.size; i++) {
    if (!strcmp(domain, site_map.pairs[i].domain)) {
      append_log("Got site: %s\n", domain);
      thrd_t t;
      options.site = site_map.pairs[i].site;
      options.URL = malloc(strlen(domain) + strlen(path) + 10);
      sprintf(options.URL, "https://%s%s", domain, path);
      options.path = malloc(strlen(path) + 1);
      strcpy(options.path, path);
      if (query) {
        options.query = malloc(strlen(query) + 1);
        strcpy(options.query, query);
      } else {
        options.query = calloc(1, sizeof(char));
      }

      append_log("pagedata URL: %s\n", options.URL);

      thrd_create(&t, extract, &options);
      thrd_detach(t);

      curl_free(domain);
      curl_free(path);
      curl_free(query);
      return 0;
    };
  }

  curl_conf_t *curl_c = malloc(sizeof(curl_conf_t));
  curl_c->URL = malloc(strlen(URL) + 1);
  strcpy(curl_c->URL, URL);

  /* filename */

  if (fn == NULL) {
    const char *patterns_str[1] = {"(?:.+\\/)([^#/?]+)"};
    str_array_t results = create_str_array(0);
    const str_array_t patterns = {(char **)patterns_str, 1};
    regex_match(path, patterns, &results);
    for (unsigned short i = 0; i < results.n; i++) {
      if (results.str[i]) {
        DEBUG_PRINT("[%d] %s\n", i, results.str[i]);
        gen_fullpathfn(curl_c->outfn, outdir, results.str[i]);
      }
    }
    free_str_array(&results);
    if (curl_c->outfn[0] == '\0') {
      // sprintf(curl_c->outfn, "%s%c%s", outdir, SPLITTER,
      // "test");
      LOG("libcurl",
          "Infer filename failed, please specify a valid filename.\n");
      curl_free(domain);
      curl_free(path);
      curl_free(query);
      return 1;
    }
  } else {
    sprintf(curl_c->outfn, "%s%s%s", outdir,
            outdir[strlen(outdir) - 1] == SPLITTER_CHAR ? "" : SPLITTER_STR,
            fn);
    free_and_nullify(fn);
  }
  DEBUG_PRINT("File will be saved as: %s\n", curl_c->outfn);
  DEBUG_PRINT("Got regular URL: %s\n", curl_c->URL);

  enqueue(&dl_queue, (void *)curl_c);

  curl_free(domain);
  curl_free(path);
  curl_free(query);

  return 0;
}

static bool get_info(const char *URL, long *psize) {
  CURL *curl;
  long resp_code;
  bool support_range = false;
  struct curl_header *pch;
  curl = curl_easy_init();
  curl_easy_setopt(curl, CURLOPT_URL, URL);
  curl_easy_setcommonopts(curl);
  curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
  CURLcode r = curl_easy_perform(curl);
  if (logerr(r)) {
    curl_easy_cleanup(curl);
    return support_range;
  }
  r = curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
                        (curl_off_t *)psize);
  if (logerr(r)) {
    curl_easy_cleanup(curl);
    return support_range;
  }
  CURLHcode rh =
      curl_easy_header(curl, "Accept-Ranges", 0, CURLH_HEADER, -1, &pch);
  if (logerr(rh) || strcmp(pch->value, "bytes")) {
    curl_easy_cleanup(curl);
    return support_range;
  }
  char *ct = NULL;
  r = curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct);
  if (logerr(r)) {
    curl_easy_cleanup(curl);
    return support_range;
  }

  support_range = true;
  curl_easy_cleanup(curl);
  return support_range;
}

static int pull_part(void *a) {
  CURLcode res;
  thrd_info_t *ti = (thrd_info_t *)a;
  curl_conf_t *curl_c = ti->curl_c;
  unsigned char n = ti->no;
  // Here we need to manually control str_array_t
  curl_c->partfn.str[n] = malloc(strlen(curl_c->outfn) + 4);
  sprintf(curl_c->partfn.str[n], "%s.%d", curl_c->outfn, n);
  DEBUG_PRINT("[THRD %hhu] partfn: %s, range: %s\n", n,
              get_str_element(&curl_c->partfn, n), ti->range);
  {
    curl_c->fplist[n] = fopen(get_str_element(&curl_c->partfn, n), "wb+");
    CURL *curl;

    curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_URL, curl_c->URL);
    curl_easy_setcommonopts(curl);
    curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 60L);
    curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 30L);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, curl_c->fplist[n]);
    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
    if (ti->curl_c->total_thrd != 1) {
      curl_easy_setopt(curl, CURLOPT_RANGE, ti->range);
    }
    curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback);
    curl_easy_setopt(curl, CURLOPT_XFERINFODATA, ti);
    res = curl_easy_perform(curl);
    rewind(curl_c->fplist[n]);
    append_log("[THRD %hhu] File downloaded.\n", n);
    curl_easy_cleanup(curl);
    logerr(res);
  }
  mtx_lock(&mtx);
  curl_c->success_thrd += 1;
  cnd_signal(&cnd); // Unblocks the waiting cleanup thread. If no threads are
                    // blocked, does nothing and returns thrd_success.

  mtx_unlock(&mtx);
  return (int)res;
}

static int merge_and_cleanup(curl_conf_t *curl_c) {
  if (corrupted) {
    append_log("Cancelling...\n");
  } else {
    append_log("Merging files...\n");
  }

  FILE *fop;
  fop = fopen(curl_c->outfn, "wb");
  if (fop == NULL) {
    // User quitted before downloading, return directly
    return 1;
  }
  for (unsigned short i = 0; i < curl_c->total_thrd; i++) {
    if (!corrupted) {
      char buffer[1024];
      size_t bytesRead = 0;
      while ((bytesRead = fread(buffer, 1, sizeof(buffer), curl_c->fplist[i])) >
             0) {
        fwrite(buffer, 1, bytesRead, fop);
      }
    }
    fclose(curl_c->fplist[i]);
    if (remove(get_str_element(&curl_c->partfn, i)) != 0) {
      append_log("Error deleting partial file %s\n",
                 get_str_element(&curl_c->partfn, i));
    }
  }
  fclose(fop);

  if (corrupted) {
    // Also delete dst file
    if (remove(curl_c->outfn) != 0) {
      append_log("Error deleting file %s\n", curl_c->outfn);
    }
  }
  // Reset stat
  corrupted = false;
  curl_c->success_thrd = 0;
  curl_c->total_thrd = 0;
  free_and_nullify(curl_c->URL);

  return 0;
}

static int download(curl_conf_t *curl_c) {
  /* Reset thrd info. */
  // if (curl_c->success_thrd == curl_c->total_thrd) {
  curl_c->success_thrd = 0;
  // }

  CURL *curl;
  curl_off_t cl = 0L, begin = 0L, end;

  static thrd_info_t thrd_info[MAX_THREAD] = {0};

  bool support_range = get_info(curl_c->URL, &cl);
  DEBUG_PRINT("Size: %ld bytes.\n", cl);
  if (support_range && cl > 0L) {
    curl_c->dltotal = cl;
    curl_c->total_thrd = (unsigned char)CEIL_DIV(cl, MAX_THREAD_SIZE);
    if (curl_c->total_thrd > MAX_THREAD) {
      curl_c->total_thrd = MAX_THREAD;
    }
    LOG("libcurl", "Server supports range header, setting threads to %hhu\n",
        curl_c->total_thrd);
  } else {
    LOG("libcurl", "Server doesn't claim range header "
                   "support, falling back to single thread.\n");
    curl_c->total_thrd = 1;
  }
  curl_off_t size_per_thrd = (cl / curl_c->total_thrd);

  curl_c->partfn = create_str_array(curl_c->total_thrd);

  for (unsigned char i = 0; i < curl_c->total_thrd; i++) {
    curl_off_t chunk_size;
    thrd_info[i].no = i;
    if (i + 1 == curl_c->total_thrd)
      chunk_size = cl - (curl_c->total_thrd - 1) * size_per_thrd;
    else
      chunk_size = size_per_thrd;
    end = begin + chunk_size - 1;
    if (curl_c->total_thrd != 1) {
      sprintf(thrd_info[i].range,
              "%" CURL_FORMAT_CURL_OFF_T "-%" CURL_FORMAT_CURL_OFF_T, begin,
              end);
    }
    thrd_info[i].curl_c = curl_c;
    int error = thrd_create(&tid[i], pull_part, &thrd_info[i]);
    if (error)
      append_log("Couldn't run thread number %d, errno %d\n", i, error);
    begin = end + 1;
  }
  return 0;
}

static void replace_illegal_char(char *str) {
  for (unsigned char i = 0; illegal_char[i] != '\0'; i++) {
    if (repchr(str, illegal_char[i], ' '))
      DEBUG_PRINT("Found illegal character '%c', replacing ...\n",
                  illegal_char[i]);
  }
}

static char *callback_struct_convert_fullpath(char *filename) {
  char *tmp = malloc(strlen(outdir_g) + strlen(filename) + 2);
  replace_illegal_char(filename);
  gen_fullpathfn(tmp, outdir_g, filename);
  free_and_nullify(filename);
  return tmp;
}

void curl_init(curl_conf_t *curl) {
  curl_global_init(CURL_GLOBAL_ALL);
  h = curl_url();
  dl_queue = create_queue();
  mtx_init(&mtx, mtx_plain);
  cnd_init(&cnd);
}

void curl_cleanup(status_t *stat) {
  /* We only need to cleanup
   * the currently active thread. */
  if (curl_conf) {

    corrupted = true; // In case libcurl is still downloading
    /* Now Wait for all threads to cancel... */
    mtx_lock(&mtx);
    while (curl_conf->success_thrd != curl_conf->total_thrd) {
      cnd_wait(&cnd, &mtx);
    }
    mtx_unlock(&mtx);
    if (!stat->is_done) {
      merge_and_cleanup(curl_conf);
    }
    mtx_destroy(&mtx);
    cnd_destroy(&cnd);
  }
  free_queue(&dl_queue);
  curl_url_cleanup(h);
  curl_global_cleanup();
}

void poll_status(status_t *stat) {
  if (!is_empty_queue(&dl_queue) && stat->is_done) {
    /* extract_done is a flag used to signal that
     * the extractor process is done. */
    curl_conf = (curl_conf_t *)dequeue(&dl_queue);
    if (download(curl_conf)) {
      // Something went wrong when creating download task
      DEBUG_PRINT("Creating download task failed.\n");
    };
    stat->is_done = false;
  }
  if (curl_conf) {
    curl_conf->dlnow = 0L;
    for (unsigned char i = 0; i < curl_conf->total_thrd; i++) {
      curl_conf->dlnow += curl_conf->dlnow_per_thrd[i];
    }
    stat->cur = curl_conf->dlnow;
    stat->total = curl_conf->dltotal;
    DEBUG_PRINT("success_thrd: %hhu, total_thrd: %hhu, is_done: %s\n",
                curl_conf->success_thrd, curl_conf->total_thrd,
                stat->is_done ? "yes" : "no");
    mtx_lock(&mtx);
    if (curl_conf->success_thrd == curl_conf->total_thrd &&
        (curl_conf->total_thrd && !stat->is_done)) {
      stat->is_done = true;
      for (unsigned short i = 0; i < curl_conf->total_thrd; i++) {
        int r;
        thrd_join(tid[i], &r);
      }
      merge_and_cleanup(curl_conf);
      // Perform the callback
      if (is_empty_queue(&dl_queue) && callback_g) {
        callback_g(p_callback_struct_g);
      }
      append_log("Download %s finished.\n", curl_conf->outfn);
      curl_conf = NULL;
    }
    mtx_unlock(&mtx);
  }
}

int get(const char *URL, char **pdstr) {
  CURL *curl = curl_easy_init();
  str_data_t pagedata = {0};
  pagedata.string = malloc(1);
  pagedata.string[0] = '\0';
  curl_easy_setopt(curl, CURLOPT_URL, URL);
  curl_easy_setcommonopts(curl);
  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write2str);
  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&pagedata);
  CURLcode res = logerr(curl_easy_perform(curl));
  *pdstr = malloc(pagedata.len + 1);
  strcpy(*pdstr, pagedata.string);
  curl_easy_cleanup(curl);
  return res;
}

/* Add an URL to dl_queue and register all the stuff.
 * - If outdir is NULL or a empty string, reuse the cached outdir_g
 * - If fn is NULL or a empty string, infer the filename from URL (otherwise
 * fail and quit')
 * - If referer is NULL or a empty string, uses NULL
 * - If callback || callback_struct is valid, execute the callback function
 * after download
 */
void add_url(const char *URL, const char *outdir, const char *fn,
             const char *referer, callback_t callback,
             callback_struct_t *p_callback_struct) {

  char *filename;
  if (fn == NULL || fn[0] == '\0') {
    filename = NULL;
  } else {
    filename = malloc(strlen(fn) + 1);
    strcpy(filename, fn);
    replace_illegal_char(filename);
  }

  if (outdir && outdir[0] != '\0') {
    outdir_g = outdir;
  }
  referer_g = referer;
  if (referer && referer[0] == '\0') {
    referer_g = NULL;
  }
  DEBUG_PRINT("referer_g: %s\n", referer_g);

  callback_g = callback;
  if (p_callback_struct) {
    p_callback_struct->videofn =
        callback_struct_convert_fullpath(p_callback_struct->videofn);
    p_callback_struct->audiofn =
        callback_struct_convert_fullpath(p_callback_struct->audiofn);
    p_callback_struct->filename =
        callback_struct_convert_fullpath(p_callback_struct->filename);
    p_callback_struct_g = p_callback_struct;
  }

  // Pass our cache (outdir_g) to parse_url()
  if (parse_url(URL, outdir_g, filename)) {
    DEBUG_PRINT("parse_url() failed with error.\n");
    return; // Parse failed, quit the task directly
  };
}