import "dotenv/config"; import puppeteer from "puppeteer"; import axios from "axios"; import mysql from "mysql2/promise"; import { DateTime } from "luxon"; import path from "node:path"; import fs from "node:fs"; const LOG_FILE = path.join(process.cwd(), "newitems.log"); const MAX_LOG_SIZE = 20 * 1024 * 1024; // 20MB // --- HELPERS --- const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); async function safeGetContent(page) { try { return await page.content(); } catch (err) { if (err.message.includes("Execution context was destroyed")) { await page.waitForNetworkIdle({ idleTime: 1000 }).catch(() => {}); return await page.content(); } throw err; } } function parseAndConvertToSydney(dateString, fromTimezone, formatDate) { const currentYear = new Date().getFullYear(); let format = /^\d/.test(dateString[0]) ? "d-MMM HH:mm" : "MMM-d HH:mm"; const fullDateString = `${dateString} ${currentYear}`; const dt = DateTime.fromFormat(fullDateString, `${format} yyyy`, { zone: fromTimezone }); if (!dt.isValid) return null; const systemTz = Intl.DateTimeFormat().resolvedOptions().timeZone; return dt.setZone(systemTz).toFormat(formatDate); } // --- LOGIC LẤY CONFIG TỪ DB --- async function getConfigs(pool) { let configs = []; const [keywords] = await pool.execute("SELECT id, name FROM hot_item_keyword WHERE level = 1"); const [keywordsCustom] = await pool.execute("SELECT id, name, url FROM newitems_config"); const [markets] = await pool.execute("SELECT id, url, market_code, shipping_postcode, timezone, country_name FROM ebay_site WHERE flag = 1"); const keyWord = keywords?.map((el) => el.name)?.join("+"); markets.forEach((m) => { const postCode = m.shipping_postcode ? `&_stpos=${m.shipping_postcode}` : ""; // URL FixedPrice configs.push({ data: `${m.url}/sch/i.html?_from=R40&_nkw=${keyWord}&_sacat=0&_sop=10${postCode}`, type_custom: "cisco", config_id: null, from_site: m.market_code, timezone: m.timezone, listingType: "FixedPrice", }); // URL Auction configs.push({ data: `${m.url}/sch/i.html?_from=R40&_nkw=${keyWord}&_sacat=0&_sop=1${postCode}`, type_custom: "cisco", config_id: null, from_site: m.market_code, timezone: m.timezone, listingType: "Auction", }); }); if (keywordsCustom.length > 0) { keywordsCustom.forEach((k) => { const matchedMarket = findMarketDataFromSearchUrl(k.url, markets); if (matchedMarket) { configs.push({ data: k.url, type_custom: "custom", config_id: k.id, from_site: matchedMarket.market_code, timezone: matchedMarket.timezone, listingType: k.url?.includes("_sop=10") ? "FixedPrice" : "Auction", }); } }); } return configs; } function findMarketDataFromSearchUrl(searchUrl, marketDatas) { if (!searchUrl?.trim()) return null; try { const searchHost = new URL(searchUrl.trim()).host; for (const data of marketDatas) { if (searchUrl.includes(data.url)) return data; const marketHost = new URL(data.url).host; if (searchHost === marketHost) return data; } } catch (e) { return null; } return null; } // --- CORE SCRAPER --- async function scrapeWithPuppeteer(browser, store) { let page = null; try { page = await browser.newPage(); // Tối ưu RAM: Chặn các request không cần thiết await page.setRequestInterception(true); page.on("request", (req) => { if (["image", "stylesheet", "font", "media"].includes(req.resourceType())) req.abort(); else req.continue(); }); await page.goto(store.data, { waitUntil: "networkidle2", timeout: 60000 }); let retries = 0; while (retries < 10) { const html = await safeGetContent(page); if (html.includes("Checking your browser")) { await wait(2000); retries++; continue; } if (await page.$("li.s-card--horizontal")) break; await wait(2000); retries++; } const items = await page?.$$eval( "li.s-card--horizontal", (nodes, store) => { const results = []; const stringToTimestamp = (str) => { if (!str) return 0; const regex = /(\d+)\s*(d|day|h|hour|m|minute|s|second)s?/gi; let total = 0; let match; while ((match = regex.exec(str))) { const value = parseInt(match[1]); const unit = match[2].toLowerCase(); if (unit === "d" || unit === "day") total += value * 86400; else if (unit === "h" || unit === "hour") total += value * 3600; else if (unit === "m" || unit === "minute") total += value * 60; else if (unit === "s" || unit === "second") total += value; } return total; }; nodes.forEach((node) => { const payload = {}; // ---------------- LINK + ID ---------------- const linkEl = node.querySelector("div.su-image a"); // if (!linkEl) return; const linkDetail = linkEl && linkEl?.href ? linkEl?.href : ""; if (!linkDetail) return; payload.link_detail = linkDetail; const idMatch = linkDetail.match(/\/itm\/(\d+)/); if (!idMatch) return; payload.id = idMatch[1]; // ---------------- NAME ---------------- const titleEl = node.querySelector(".s-card__title"); if (titleEl) { let name = titleEl.textContent.replace(/New\s*listing/i, "").trim(); payload.title = name; payload.description = name; } // ---------------- CONDITION ---------------- const condEl = node.querySelector(".s-card__subtitle"); payload.condition_item = condEl?.textContent.trim() || ""; // -------- IMAGE -------- const pictureEl = node.querySelector("img.s-card__image"); if (pictureEl) payload.picture = pictureEl.getAttribute("src") || ""; // ---------------- PRICE + CURRENCY ---------------- const priceEl = node.querySelector(".s-card__price"); if (priceEl) { let text = priceEl.textContent.replace(",", "").trim(); text = text.replace("£", "GBP ").replace("$", ""); payload.priceText = text; const match = text.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/); if (match) { payload.currencyID = match[1] || match[3] || ""; payload.price = match[2] || ""; if (!payload.currencyID?.trim()) { const match1 = text.match(/([A-Za-z]{2,3})\s?([\d.]+)/); if (match1) { payload.currencyID = match1[1] || match1[3] || ""; } } // if (!payload.currencyID?.trim()) payload.currencyID = ""; } } // ---------------- AUCTION / BIDS ---------------- const bidsEl = node.querySelector(".s-card__attribute-row .su-styled-text.large"); if (bidsEl) { let txt = bidsEl.textContent.trim(); if (/bids/i.test(txt)) { let bid = txt.match(/\d+/); if (bid) payload.bidCount = bid[0]; payload.is_auctionList = 1; } if (/Buy It Now/i.test(txt)) payload.buyItNowAvailable = true; if (/Best Offer/i.test(txt)) payload.makeOffer = true; } // Keep only correct listing type const type = store.listingType || "FixedPrice"; if (type === "Auction" && !payload.is_auctionList) return; if (type !== "Auction" && !payload.buyItNowAvailable && !payload.makeOffer) return; // ---------------- TIME LEFT (auction) ---------------- const leftEl = node.querySelector(".s-card__time-left"); if (leftEl) { const leftText = leftEl.textContent.trim(); payload.timeLeft = leftText; payload.end_time = Math.floor(Date.now() / 1000 + stringToTimestamp(leftText)); } // ---------------- SHIPPING & FROM SITE ---------------- node.querySelectorAll(".s-card__attribute-row .su-styled-text.secondary.large").forEach((sp) => { const txt = sp.textContent; // shipping cost if (txt?.toLowerCase().includes("delivery")) { // payload.shipping = txt; const m = txt.match(/\d+\.\d+/); if (m) payload.shipping_cost = m[0]; if (txt.toLowerCase().includes("free") || !payload.shipping_cost) payload.shipping_cost = 0; } // country if (txt?.toLowerCase().includes("from") || txt?.toLowerCase().includes("located in")) { payload.country = txt; const markets = [ { country_name: "Australia", market_code: "EBAY_AU" }, { country_name: "United Kingdom", market_code: "EBAY_GB" }, { country_name: "United States", market_code: "EBAY_US" }, { country_name: "Canada", market_code: "EBAY_ENCA" }, ]; for (let site of markets) { if (txt.includes(site.country_name)) { payload.from_site = site.market_code; break; } } } }); const from_site = store.from_site; if (!payload.country) payload.from_site = from_site; if ((payload.country && payload.from_site !== from_site) || (payload.country && !payload.from_site)) return; delete payload.country; // ---------------- SELLER / FEEDBACK ---------------- node.querySelectorAll(".s-card__attribute-row .su-styled-text.primary.large").forEach((sp) => { const text = sp.textContent; // 96.9% positive (105) const m = text.match(/^([\d.]+%)\s*\w*\s*\(([^)]+)\)/); if (m) { payload.feedbackPercent = m[1]; payload.feedbackScore = m[2]; } else { payload.seller = text.trim(); } }); if (!payload.seller || !payload.price) return; // ---------------- START TIME / END TIME (Buy it now listing) ---------------- const dateEl = node.querySelector(".s-card__attribute-row .su-styled-text.secondary.bold.large"); if (dateEl) { const text = dateEl.textContent.trim(); // NOTE: tùy bạn → tôi giữ logic y như PHP const ts = Date.parse(text); if (!isNaN(ts)) { payload.time = text; // const timeConvert = parseAndConvertToSydney(text, store.timezone, "yyyy/MM/dd HH:mm"); // const timestamp = DateTime.fromFormat(timeConvert, format) // parse theo format // .toSeconds(); // timestamp giây // payload.start_time = Math.floor(timestamp); // payload.end_time = Math.floor(timestamp) + 2592000; // payload.start_time_string = timeConvert; } } const type_custom = store.type_custom || "custom"; payload.current_time = Date.now(); payload.type_custom = type_custom; payload.listingType = type; payload.config_id = store.config_id || null; results.push(payload); }); return results; }, store ); return items.map((item) => { if (!item.time) return item; const timeConvert = parseAndConvertToSydney(item.time, store.timezone, "yyyy/MM/dd HH:mm"); const ts = new Date(timeConvert).getTime() / 1000; return { ...item, timeConvert, start_time: ts, end_time: item.end_time || ts + 2592000, start_time_string: timeConvert }; }); } catch (err) { console.error(`Lỗi tại ${store.data}:`, err.message); return []; } finally { if (page) await page.close(); } } function trimLogFileIfNeeded() { try { if (!fs.existsSync(LOG_FILE)) return; const stats = fs.statSync(LOG_FILE); if (stats.size > MAX_LOG_SIZE) { const data = fs.readFileSync(LOG_FILE, "utf8"); const lines = data.split("\n"); // Giữ lại 70% dòng mới nhất const keepLines = Math.floor(lines.length * 0.7); const trimmed = lines.slice(-keepLines).join("\n"); fs.writeFileSync(LOG_FILE, trimmed + "\n", "utf8"); console.log(`\n[SYSTEM] Log file trimmed (Size: ${(stats.size / 1024 / 1024).toFixed(2)}MB > 20MB)`); } } catch (err) { console.error("Error trimming log file:", err); } } // --- MAIN PROCESS --- async function main() { const pool = mysql.createPool({ host: process.env.MYSQL_HOST, user: process.env.MYSQL_USER, password: process.env.MYSQL_PASSWORD, database: process.env.MYSQL_DB_NAME, waitForConnections: true, connectionLimit: 5, }); const browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--single-process"], }); console.log("🚀 Bắt đầu phiên làm việc mới..."); let runCount = 0; const MAX_RUNS = 60; // Chạy 60 chu kỳ (~1 tiếng) const ONE_HOUR_MS = 60 * 60 * 1000; const SESSION_START = Date.now(); // Thời điểm bắt đầu chạy script const MAX_TIME_EXTENSIONS = SESSION_START + ONE_HOUR_MS; // Thời điểm phải kết thúc while (runCount < MAX_RUNS && Date.now() < MAX_TIME_EXTENSIONS) { runCount++; const startTime = Date.now(); console.log(`--- Chu kỳ ${runCount}/${MAX_RUNS} --- ${MAX_TIME_EXTENSIONS - Date.now()} ---`); try { const configs = await getConfigs(pool); for (const store of configs) { if (Date.now() > MAX_TIME_EXTENSIONS) break; const items = await scrapeWithPuppeteer(browser, store); for (const item of items) { const [rows] = await pool.execute("SELECT id FROM items WHERE id = ?", [item.id]); if (rows.length > 0) continue; const title = (item.title || "").replace("Opens in a new window or tab", "").trim(); console.log(`Processing ${store.data}`); console.log({ ...item, title }); try { const res = await axios .post( `${process.env.API_DISTI_HOST}/api/items/insert`, { ...item, title }, { headers: { "x-key": "CanTho#1" }, } ) .then((res) => { console.log(res.data, item.id, item.timeConvert, item.time); }) .catch((err) => { console.log(err); }); } catch (e) { console.error(`❌ Lỗi API ID ${item.id}:`, e.message); } } } } catch (err) { console.error("Lỗi chu kỳ:", err.message); } const duration = Date.now() - startTime; const delay = Math.max(0, 60000 - duration); console.log(`Hoàn thành chu kỳ trong ${duration / 1000}s. Nghỉ ${delay / 1000}s.`); if (runCount < MAX_RUNS && Date.now() + delay < MAX_TIME_EXTENSIONS) await wait(delay); } console.log("🏁 Đã chạy đủ 60 lần. Đang làm mới tiến trình..."); await browser.close(); await pool.end(); // Thực hiện dọn log một lần cuối trước khi thoát process hoàn toàn trimLogFileIfNeeded(); process.exit(0); } main().catch((err) => { console.error("FATAL ERROR:", err); process.exit(1); });