/* processing_similiar_search_outlook.js */
const axios        = require('axios');
const https        = require('https');
const natural      = require('natural');
const wordnet      = new natural.WordNet();
const { removeStopwords } = require('stopword');
const PorterStemmer = natural.PorterStemmer;
const JaroWinkler   = natural.JaroWinklerDistance;
const config       = require('../config').default;

const FUZZY_THRESHOLD = 0.90;
const MAX_BATCH       = 20;   // Graph $batch limit
const MATCH_THRESHOLD = 0.6;
const GRAPH_TOP       = 50;   // per user/folder
const PAGE_LIMIT      = 50;   // safety cap for pagination

// Reuse TLS connections
const http = axios.create({
  baseURL: 'https://graph.microsoft.com/v1.0',
  timeout: 15000,
  httpsAgent: config.proxy.enableForMicrosoftGraph 
    ? config.proxy.proxyAgent 
    : new https.Agent({ keepAlive: true }),
  httpAgent: config.proxy.enableForMicrosoftGraph 
    ? config.proxy.proxyAgent 
    : undefined,
});

const synonymCache = new Map();

/* -------------------- fuzzy helpers -------------------- */
async function fetchSynonyms(word) {
  const lw = word.toLowerCase();
  if (synonymCache.has(lw)) return synonymCache.get(lw);
  return new Promise(resolve => {
    wordnet.lookup(lw, results => {
      const syns = new Set();
      for (const r of results) {
        for (const s of r.synonyms) syns.add(s.replace(/_/g,' ').toLowerCase());
      }
      syns.delete(lw);
      const list = Array.from(syns);
      synonymCache.set(lw, list);
      resolve(list);
    });
  });
}

function getCleanWords(raw) {
  const tokens = (raw || '')
    .toLowerCase()
    .replace(/[^\w\s]/g, ' ')
    .split(/\s+/)
    .filter(Boolean);
  const pruned = removeStopwords(tokens);
  return Array.from(new Set(pruned.filter(w => w.length > 2)));
}

/**
 * Return:
 * - rawTokenGroups: [[token + synonyms] …]
 * - stemTokenGroups: [[stemmed tokens] …]
 * - anchorIdxs: indexes of groups that act as “anchors” (few synonyms => specific terms)
 */
async function getTokenGroups(subject) {
  const words    = getCleanWords(subject);
  const synLists = await Promise.all(words.map(fetchSynonyms));

  const rawTokenGroups = words.map((w,i) =>
    Array.from(new Set([w, ...(synLists[i] || [])]))
  );

  const stemTokenGroups = rawTokenGroups.map(group =>
    Array.from(new Set(group.map(w => PorterStemmer.stem(w))))
  );

  // Heuristic: few synonyms => more specific => anchor
  // Tweakable threshold: <= 2 synonyms marks anchor
  const anchorIdxs = synLists
    .map((syns, i) => ({ i, n: (syns || []).length }))
    .filter(o => o.n <= 2)
    .map(o => o.i);

  return { rawTokenGroups, stemTokenGroups, anchorIdxs };
}

function passesEnhancedFilter(subject, stemTokenGroups, requiredMatches, anchorIdxs) {
  const stems = getCleanWords(subject).map(w => PorterStemmer.stem(w));

  // Which groups matched?
  const matched = stemTokenGroups.map(group =>
    group.some(tok => stems.includes(tok) || stems.some(s => JaroWinkler(tok, s) >= FUZZY_THRESHOLD))
  );

  const totalMatches = matched.reduce((a,b) => a + (b ? 1 : 0), 0);
  if (totalMatches < requiredMatches) return false;

  // Anchor requirement: at least one anchor group must match (when anchors exist)
  if (anchorIdxs && anchorIdxs.length > 0) {
    const anchorHit = anchorIdxs.some(idx => matched[idx]);
    if (!anchorHit) return false;
  }

  return true;
}

/* -------------------- date helpers -------------------- */
function toIsoRangeAround(dateLike, days = 2) {
  const base = new Date(dateLike);
  if (isNaN(base.getTime())) {
    const now = new Date();
    const start = new Date(now); start.setDate(now.getDate() - days);
    const end   = new Date(now); end.setDate(now.getDate() + days);
    return [start.toISOString(), end.toISOString()];
  }
  const start = new Date(base), end = new Date(base);
  start.setDate(start.getDate() - days);
  end.setDate(end.getDate() + days);
  const startWhole = new Date(start); startWhole.setUTCHours(0,0,0,0);
  const endWhole   = new Date(end);   endWhole.setUTCHours(23,59,59,999);
  return [startWhole.toISOString(), endWhole.toISOString()];
}
function toYMD(iso) { return (iso || '').slice(0,10); }

/* -------------------- Graph query builders -------------------- */
function normalizeSubjectForExact(s) {
  return (s || '').replace(/^\s*(re|fw|fwd)\s*:\s*/i, '').trim();
}

// AQS string for $search (quote the phrase + date window)
function buildAqsSearch(subject, startIso, endIso) {
  const phrase = normalizeSubjectForExact(subject).replace(/"/g, '\\"');
  const start = toYMD(startIso);
  const end   = toYMD(endIso);
  // NOTE: $search and $filter cannot be combined. Put date window in AQS.
  return `subject:"${phrase}" received>=${start} received<=${end}`;
}

// Pure date filter fallback ($filter only; no $search)
function buildDateOnlyFilter(startIso, endIso) {
  return `(receivedDateTime ge ${startIso} and receivedDateTime le ${endIso})`;
}

function buildFolderRequests(email, mailRecievedTime, seedSubject) {
  const [startIso, endIso] = toIsoRangeAround(mailRecievedTime, 2);

  // $search variant (AQS)
  const aqs = buildAqsSearch(seedSubject, startIso, endIso);
  const searchQS = `$top=${GRAPH_TOP}&$select=id,subject,parentFolderId,receivedDateTime&$orderby=receivedDateTime desc&$search="${encodeURIComponent(aqs)}"`;
  const inboxSearch = {
    id: `${email}#messages#search`,
    method: 'GET',
    url: `/users/${encodeURIComponent(email)}/messages?${searchQS}`,
    headers: { 'ConsistencyLevel': 'eventual' }
  };
  const delSearch = {
    id: `${email}#deleteditems#search`,
    method: 'GET',
    url: `/users/${encodeURIComponent(email)}/mailFolders/deleteditems/messages?${searchQS}`,
    headers: { 'ConsistencyLevel': 'eventual' }
  };

  // $filter (date-only) fallback
  const dateFilter = buildDateOnlyFilter(startIso, endIso);
  const filterQS = `$top=${GRAPH_TOP}&$select=id,subject,parentFolderId,receivedDateTime&$orderby=receivedDateTime desc&$filter=${encodeURIComponent(dateFilter)}`;
  const inboxFilter = {
    id: `${email}#messages#dateonly`,
    method: 'GET',
    url: `/users/${encodeURIComponent(email)}/messages?${filterQS}`
  };
  const delFilter = {
    id: `${email}#deleteditems#dateonly`,
    method: 'GET',
    url: `/users/${encodeURIComponent(email)}/mailFolders/deleteditems/messages?${filterQS}`
  };

  return { search: [inboxSearch, delSearch], dateOnly: [inboxFilter, delFilter] };
}

/* -------------------- batching -------------------- */
async function outlookBatchProcess(requests, token) {
  try {
    const { data } = await http.post('/$batch', { requests }, {
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${token}`,
      }
    });
    return data.responses || [];
  } catch {
    return [];
  }
}

async function executeInBatches(allRequests, token, concurrency = 6) {
  const chunks = [];
  for (let i = 0; i < allRequests.length; i += MAX_BATCH) {
    chunks.push(allRequests.slice(i, i + MAX_BATCH));
  }
  let next = 0;
  const out = [];
  async function worker() {
    while (true) {
      const i = next++;
      if (i >= chunks.length) break;
      const res = await outlookBatchProcess(chunks[i], token);
      out.push(...res);
    }
  }
  const n = Math.min(concurrency, chunks.length || 1);
  await Promise.all(Array.from({ length: n }, worker));
  return out;
}

async function fetchAllPages(nextLink, token, headers = {}, limit = PAGE_LIMIT) {
  const all = [];
  let url = nextLink;
  const axiosConfig = {
    headers: { 'Authorization': `Bearer ${token}`, ...headers },
    ...(config.proxy.enableForMicrosoftGraph && { httpsAgent: config.proxy.proxyAgent, httpAgent: config.proxy.proxyAgent })
  };
  while (url && all.length < limit) {
    const { data } = await http.get(url, axiosConfig);
    if (Array.isArray(data.value)) {
      all.push(...data.value.slice(0, limit - all.length));
    }
    url = all.length < limit ? data['@odata.nextLink'] : null;
  }
  return all;
}

/* -------------------- main -------------------- */
async function processSimilarSearchOutlook(items) {
  if (!items || !items.length) return [];

  const seedSubject = items[0].subject || '';
  const { rawTokenGroups, stemTokenGroups, anchorIdxs } = await getTokenGroups(seedSubject);
  const requiredMatches = Math.max(1, Math.ceil(rawTokenGroups.length * MATCH_THRESHOLD));

  const mailRecievedTime = items[0].mailRecievedTime;

  // Build requests for each user (Inbox + DeletedItems; $search then fallback)
  const searchRequests = [];
  const dateOnlyRequests = [];
  const metaIdx = new Map(); // id -> { item, folder, headers }
  for (const item of items) {
    const { search, dateOnly } = buildFolderRequests(item.email, mailRecievedTime, seedSubject);
    for (const r of search) {
      searchRequests.push(r);
      metaIdx.set(r.id, { item, folder: r.id.includes('#deleteditems#') ? 'trash' : 'inbox', headers: r.headers || {} });
    }
    for (const r of dateOnly) {
      dateOnlyRequests.push(r);
      metaIdx.set(r.id, { item, folder: r.id.includes('#deleteditems#') ? 'trash' : 'inbox', headers: r.headers || {} });
    }
  }

  const token = items[0].token;
  const results = [];

  // Phase 1: $search (phrase + date in AQS)
  const searchResponses = await executeInBatches(searchRequests, token, 6);
  const needFallbackIds = [];

  for (const res of searchResponses) {
    const meta = metaIdx.get(res.id);
    if (!meta) continue;

    if (res.status !== 200 || !res.body) {
      // Fallback this user+folder
      needFallbackIds.push(res.id.replace('#search', '#dateonly'));
      continue;
    }

    const page0 = Array.isArray(res.body.value) ? res.body.value : [];
    let msgs = page0.filter(m => passesEnhancedFilter(m.subject, stemTokenGroups, requiredMatches, anchorIdxs));

    if ((msgs.length < PAGE_LIMIT) && res.body['@odata.nextLink']) {
      const more = await fetchAllPages(res.body['@odata.nextLink'], token, meta.headers, PAGE_LIMIT - page0.length);
      msgs = msgs.concat(more.filter(m => passesEnhancedFilter(m.subject, stemTokenGroups, requiredMatches, anchorIdxs)));
    }

    if (!msgs.length) {
      needFallbackIds.push(res.id.replace('#search', '#dateonly'));
      continue;
    }

    for (const msg of msgs) {
      const { item, folder } = meta;
      results.push({
        from:              item.from,
        subject:           msg.subject,
        reportedMailId:    item.reportedMailId,
        companyId:         item.companyId,
        domainId:          item.domainId,
        isLast:            item.isLast,
        internetMessageId: item.internetMessageId,
        messageId:         msg.id,
        name:              item.name,
        emailUserId:       item.emailUserId || null,
        email:             item.email,
        foundInTrash:      folder === 'trash',
        foundInInbox:      folder === 'inbox',
        mailAction:        item.mailAction,
        folderId:          msg.parentFolderId
                });
            }
        }

  // Phase 2: fallback ($filter date-only + local fuzzy + anchor)
  if (needFallbackIds.length) {
    const want = new Set(needFallbackIds);
    const fbReqs = dateOnlyRequests.filter(r => want.has(r.id));
    const fbResponses = await executeInBatches(fbReqs, token, 6);
    for (const res of fbResponses) {
      const meta = metaIdx.get(res.id);
      if (!meta || res.status !== 200 || !res.body) continue;

      const page0 = Array.isArray(res.body.value) ? res.body.value : [];
      let msgs = page0.filter(m => passesEnhancedFilter(m.subject, stemTokenGroups, requiredMatches, anchorIdxs));
      if ((msgs.length < PAGE_LIMIT) && res.body['@odata.nextLink']) {
        const more = await fetchAllPages(res.body['@odata.nextLink'], token, {}, PAGE_LIMIT - page0.length);
        msgs = msgs.concat(more.filter(m => passesEnhancedFilter(m.subject, stemTokenGroups, requiredMatches, anchorIdxs)));
      }

      for (const msg of msgs) {
        const { item, folder } = meta;
        results.push({
          from:              item.from,
          subject:           msg.subject,
          reportedMailId:    item.reportedMailId,
          companyId:         item.companyId,
          domainId:          item.domainId,
          isLast:            item.isLast,
          internetMessageId: item.internetMessageId,
          messageId:         msg.id,
          name:              item.name,
          emailUserId:       item.emailUserId || null,
          email:             item.email,
          foundInTrash:      folder === 'trash',
          foundInInbox:      folder === 'inbox',
          mailAction:        item.mailAction,
          folderId:          msg.parentFolderId
        });
      }
    }
  }

  return results;
}

module.exports = processSimilarSearchOutlook;
