import "dotenv/config"; import puppeteer from "puppeteer"; import axios from "axios"; import mysql from "mysql2/promise"; import { DateTime } from "luxon"; // Define function promise waiting for a given time async function wait(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } async function safeGetContent(page) { try { return await page.content(); } catch (err) { // Nếu navigation xảy ra → chờ page ổn định lại rồi đọc tiếp if (err.message.includes("Execution context was destroyed")) { await page.waitForNetworkIdle({ idleTime: 1000 }).catch(() => {}); return await page.content(); } throw err; } } function parseAndConvertToSydney(dateString, fromTimezone, formatDate) { const currentYear = new Date().getFullYear(); // Detect format: "Dec-9 23:05" or "9-Dec 23:05" let format = "MMM-d HH:mm"; if (/^\d/.test(dateString[0])) { format = "d-MMM HH:mm"; } const fullDateString = `${dateString} ${currentYear}`; const dt = DateTime.fromFormat(fullDateString, `${format} yyyy`, { zone: fromTimezone }); if (!dt.isValid) { // console.log("❌ Invalid Luxon parse:", dt.invalidReason, fullDateString, format); return null; } const systemTz = Intl.DateTimeFormat().resolvedOptions().timeZone; return dt.setZone(systemTz).toFormat(formatDate); } async function scrapeWithPuppeteer(store) { // console.log(`Fetching with scrapeWithPuppeteer`); try { const browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const page = await browser.newPage(); await page.goto(store.data, { waitUntil: "networkidle2" }); // ----- RETRY HANDLE (Cloudflare / Slow load) ----- let retries = 0; while (retries < 10) { try { // console.log(`Retry ${retries + 1}`); const html = await safeGetContent(page); // Detect Cloudflare or other blocking messages if (html.includes("Checking your browser")) { await wait(2000); retries++; continue; } const hasItems = await page.$("li.s-card--horizontal"); if (hasItems) break; await wait(2000); retries++; } catch (err) { await wait(2000); retries++; } } // Wait for cards or detect Cloudflare const html = await safeGetContent(page); const needBrowserCheck = html.includes("Checking your browser"); if (needBrowserCheck) { await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null); } // ================================================ // MAIN SCRAPING LOGIC (FULL CONVERT FROM PHP) // ================================================ const items = await page?.$$eval( "li.s-card--horizontal", (nodes, store) => { const results = []; const stringToTimestamp = (str) => { if (!str) return 0; const regex = /(\d+)\s*(d|day|h|hour|m|minute|s|second)s?/gi; let total = 0; let match; while ((match = regex.exec(str))) { const value = parseInt(match[1]); const unit = match[2].toLowerCase(); if (unit === "d" || unit === "day") total += value * 86400; else if (unit === "h" || unit === "hour") total += value * 3600; else if (unit === "m" || unit === "minute") total += value * 60; else if (unit === "s" || unit === "second") total += value; } return total; }; nodes.forEach((node) => { const payload = {}; // ---------------- LINK + ID ---------------- const linkEl = node.querySelector("div.su-media__image a"); // if (!linkEl) return; payload.link_detail = linkEl?.href || ""; const idMatch = linkEl.href.match(/\/itm\/(\d+)/); if (!idMatch) return; payload.id = idMatch[1]; // ---------------- NAME ---------------- const titleEl = node.querySelector(".s-card__title"); if (titleEl) { let name = titleEl.textContent.replace(/New\s*listing/i, "").trim(); payload.title = name; payload.description = name; } // ---------------- CONDITION ---------------- const condEl = node.querySelector(".s-card__subtitle"); payload.condition_item = condEl?.textContent.trim() || ""; // -------- IMAGE -------- const pictureEl = node.querySelector("img.s-card__image"); if (pictureEl) payload.picture = pictureEl.getAttribute("src") || ""; // ---------------- PRICE + CURRENCY ---------------- const priceEl = node.querySelector(".s-card__price"); if (priceEl) { let text = priceEl.textContent.replace(",", "").trim(); text = text.replace("£", "GBP").replace("$", ""); const match = text.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/); if (match) { payload.currencyID = match[1] || match[3] || ""; payload.price = match[2] || ""; if (!payload.currencyID?.trim()) { const match1 = text.match(/([A-Za-z]{2,3})\s?([\d.]+)/); if (match1) { payload.currencyID = match[1] || match[3] || ""; } } if (!payload.currencyID?.trim()) payload.currency = "AU"; } } // ---------------- AUCTION / BIDS ---------------- const bidsEl = node.querySelector(".s-card__attribute-row .su-styled-text.large"); if (bidsEl) { let txt = bidsEl.textContent.trim(); if (/bids/i.test(txt)) { let bid = txt.match(/\d+/); if (bid) payload.bidCount = bid[0]; payload.is_auctionList = 1; } if (/Buy It Now/i.test(txt)) payload.buyItNowAvailable = true; if (/Best Offer/i.test(txt)) payload.makeOffer = true; } // Keep only correct listing type const type = store.listingType || "FixedPrice"; if (type === "Auction" && !payload.is_auctionList) return; if (type !== "Auction" && !payload.buyItNowAvailable && !payload.makeOffer) return; // ---------------- TIME LEFT (auction) ---------------- const leftEl = node.querySelector(".s-card__time-left"); if (leftEl) { const leftText = leftEl.textContent.trim(); payload.timeLeft = leftText; payload.end_time = Math.floor(Date.now() / 1000 + stringToTimestamp(leftText)); } // ---------------- SHIPPING & FROM SITE ---------------- node.querySelectorAll(".s-card__attribute-row .su-styled-text.secondary.large").forEach((sp) => { const txt = sp.textContent; // shipping cost if (txt?.toLowerCase().includes("delivery")) { // payload.shipping = txt; const m = txt.match(/\d+\.\d+/); if (m) payload.shipping_cost = m[0]; if (txt.toLowerCase().includes("free") || !payload.shipping_cost) payload.shipping_cost = 0; } // country if (txt?.toLowerCase().includes("from") || txt?.toLowerCase().includes("located in")) { payload.country = txt; const markets = [ { country_name: "Australia", market_code: "EBAY_AU" }, { country_name: "United Kingdom", market_code: "EBAY_GB" }, { country_name: "United States", market_code: "EBAY_US" }, { country_name: "Canada", market_code: "EBAY_ENCA" }, ]; for (let site of markets) { if (txt.includes(site.country_name)) { payload.from_site = site.market_code; break; } } } }); const from_site = store.from_site; if (!payload.country) payload.from_site = from_site; if ((payload.country && payload.from_site !== from_site) || (payload.country && !payload.from_site)) return; delete payload.country; // ---------------- SELLER / FEEDBACK ---------------- node.querySelectorAll(".s-card__attribute-row .su-styled-text.primary.large").forEach((sp) => { const text = sp.textContent; // 96.9% positive (105) const m = text.match(/^([\d.]+%)\s*\w*\s*\(([^)]+)\)/); if (m) { payload.feedbackPercent = m[1]; payload.feedbackScore = m[2]; } else { payload.seller = text.trim(); } }); if (!payload.seller || !payload.price) return; // ---------------- START TIME / END TIME (Buy it now listing) ---------------- const dateEl = node.querySelector(".s-card__attribute-row .su-styled-text.secondary.bold.large"); if (dateEl) { const text = dateEl.textContent.trim(); // NOTE: tùy bạn → tôi giữ logic y như PHP const ts = Date.parse(text); if (!isNaN(ts)) { payload.time = text; // const timeConvert = parseAndConvertToSydney(text, store.timezone, "yyyy/MM/dd HH:mm"); // const timestamp = DateTime.fromFormat(timeConvert, format) // parse theo format // .toSeconds(); // timestamp giây // payload.start_time = Math.floor(timestamp); // payload.end_time = Math.floor(timestamp) + 2592000; // payload.start_time_string = timeConvert; } } const type_custom = store.type_custom || "custom"; payload.current_time = Date.now(); payload.type_custom = type_custom; payload.listingType = type; payload.config_id = store.config_id || null; results.push(payload); }); return results; }, store ); const results = items.map((item) => { if (!item.time) return item; // PROCESS START TIME + END TIME const timeConvert = parseAndConvertToSydney(item.time, store.timezone, "yyyy/MM/dd HH:mm"); const timestamp = new Date(timeConvert).getTime() / 1000; item.timeConvert = timeConvert; item.start_time = timestamp; item.end_time = timestamp + 2592000; item.start_time_string = timeConvert; // delete item.time; return { ...item, }; }); await browser.close(); return results; } catch (err) { console.log("Error scrapeWithPuppeteer:", err); return []; } } function findMarketDataFromSearchUrl(searchUrl, marketDatas) { if (!searchUrl || !searchUrl.trim()) { return null; } searchUrl = searchUrl.trim(); let searchHost; try { searchHost = new URL(searchUrl).host; } catch (e) { searchHost = null; } for (const data of marketDatas) { if (!data.url) continue; // 1) direct substring match if (searchUrl.includes(data.url)) { return data; } // 2) fallback: host comparison let marketHost = null; try { marketHost = new URL(data.url).host; } catch (e) { marketHost = null; } if (searchHost && marketHost && searchHost === marketHost) { return data; } } return null; } async function main() { const db = await mysql.createConnection({ host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP user: process.env.MYSQL_USER, password: process.env.MYSQL_PASSWORD, database: process.env.MYSQL_DB_NAME, }); // console.log("✅ Connected to MySQL"); // const errors = []; let configs = []; const [keywords] = await db.execute("SELECT id, name FROM hot_item_keyword WHERE level = 1"); const [keywordsCustom] = await db.execute("SELECT id, name, url FROM newitems_config"); const [markets] = await db.execute("SELECT id, url, market_code, shipping_postcode, timezone, country_name FROM ebay_site WHERE flag = 1"); const keyWord = keywords?.map((el) => el.name)?.join("+"); markets.forEach((m) => { const postCode = m.shipping_postcode ? `&_stpos=${m.shipping_postcode}` : ""; // URL FixedPrice (_sop=10) configs.push({ data: `${m.url}/sch/i.html?_from=R40&_nkw=${keyWord}&_sacat=0&_sop=10${postCode}`, type_custom: "cisco", config_id: null, from_site: m.market_code, timezone: m.timezone, listingType: "FixedPrice", }); // URL Auction (_sop=1) configs.push({ data: `${m.url}/sch/i.html?_from=R40&_nkw=${keyWord}&_sacat=0&_sop=1${postCode}`, type_custom: "cisco", config_id: null, from_site: m.market_code, timezone: m.timezone, listingType: "Auction", }); }); if (keywordsCustom.length > 0) { keywordsCustom.forEach((k) => { const matchedMarket = findMarketDataFromSearchUrl(k.url, markets); if (matchedMarket) { configs.push({ data: k.url, type_custom: "custom", config_id: k.id, from_site: matchedMarket.market_code, timezone: matchedMarket.timezone, listingType: k.url?.includes("_sop=10") ? "FixedPrice" : "Auction", }); } }); } // console.log(`Total configs to process: ${configs.length}`); for (const store of configs) { console.log(`Processing ${store.data}`); // let items = await scrapeWithCheerio(store); let items = await scrapeWithPuppeteer(store); for (const item of items) { // 2️⃣ Check if record exists const [rows] = await db.execute("SELECT id FROM items WHERE id = ?", [item.id]); if (rows.length > 0 || item.id === "123456") continue; // 3️⃣ Insert new record const title = (item.title || "").replace("Opens in a new window or tab", "").trim(); // console.log({ ...item, title }); await axios .post( process.env.API_DISTI_HOST + "/api/items/insert", { ...item, title }, { headers: { "x-key": "CanTho#1", }, } ) .then((res) => { console.log(res.data, item.id, item.timeConvert, item.time); }) .catch((err) => { console.log(err); }); } } // console.log("✅ Done scraping."); await db.end(); // console.log("🔌 MySQL connection closed"); process.exit(0); } main().catch((err) => { console.error(err); process.exit(1); });