289 lines
8.9 KiB
JavaScript
289 lines
8.9 KiB
JavaScript
import "dotenv/config";
|
||
import puppeteer from "puppeteer";
|
||
import axios from "axios";
|
||
import * as cheerio from "cheerio";
|
||
import nodemailer from "nodemailer";
|
||
import dayjs from "dayjs";
|
||
import mysql from "mysql2/promise";
|
||
|
||
const EMAILS = ["andrew.ng@apactech.io", "dev@apactech.io"];
|
||
// const EMAILS = ["andrew.ng@apactech.io"];
|
||
|
||
const LIST_STORE = [
|
||
{
|
||
name: "ocdepot",
|
||
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
|
||
},
|
||
{
|
||
name: "itinstock",
|
||
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
|
||
},
|
||
{
|
||
name: "kartechllc",
|
||
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
|
||
},
|
||
{
|
||
name: "g-electronic",
|
||
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
|
||
},
|
||
];
|
||
|
||
// Define function promise waiting for a given time
|
||
async function wait(ms) {
|
||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||
}
|
||
|
||
async function scrapeWithPuppeteer(url, name) {
|
||
try {
|
||
const browser = await puppeteer.launch({
|
||
headless: true,
|
||
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
||
});
|
||
const page = await browser.newPage();
|
||
await page.goto(url, { waitUntil: "networkidle2" });
|
||
|
||
let retries = 0;
|
||
while (retries < 10) {
|
||
try {
|
||
console.log(`${name} retry ${retries + 1}`);
|
||
const html = await page.content();
|
||
|
||
// Detect Cloudflare or other blocking messages
|
||
if (html.includes("Checking your browser")) {
|
||
await wait(5000);
|
||
retries++;
|
||
continue;
|
||
}
|
||
|
||
const hasItems = await page.$("li.s-card--horizontal");
|
||
if (hasItems) break;
|
||
|
||
await wait(2000);
|
||
retries++;
|
||
} catch (err) {
|
||
await wait(2000);
|
||
retries++;
|
||
}
|
||
}
|
||
|
||
// Wait for cards or detect Cloudflare
|
||
const html = await page.content();
|
||
const needBrowserCheck = html.includes("Checking your browser");
|
||
if (needBrowserCheck) {
|
||
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
|
||
}
|
||
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
|
||
const results = [];
|
||
nodes.forEach((node) => {
|
||
const payload = {};
|
||
const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default");
|
||
if (!soldEl) return;
|
||
const soldText = soldEl.textContent.trim().replace("Sold", "").trim();
|
||
payload.sold_out_date = soldText;
|
||
|
||
const date = new Date(soldText);
|
||
if (isNaN(date.getTime())) return;
|
||
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
|
||
if (daysDiff > 7) return;
|
||
|
||
const linkEl = node.querySelector("div.su-media__image a");
|
||
if (linkEl) {
|
||
payload.link_detail = linkEl.href;
|
||
const match = linkEl.href.match(/\/itm\/(\d+)/);
|
||
if (match) payload.id = match[1];
|
||
}
|
||
|
||
const titleEl = node.querySelector(".s-card__title");
|
||
if (titleEl) {
|
||
payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim();
|
||
payload.description = payload.name;
|
||
}
|
||
|
||
const conditionEl = node.querySelector(".s-card__subtitle");
|
||
if (conditionEl) payload.condition = conditionEl.textContent.trim();
|
||
|
||
const priceEl = node.querySelector(".s-card__price");
|
||
if (priceEl) {
|
||
let txt = priceEl.textContent.replace(",", "").trim();
|
||
txt = txt.replace("£", "GBP").replace("$", "USD");
|
||
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
|
||
if (match) {
|
||
payload.currency = match[1] || match[3] || "";
|
||
payload.price = match[2] || "";
|
||
if (!payload.currency?.trim()) payload.currency = "USD";
|
||
}
|
||
}
|
||
if (payload.id) results.push(payload);
|
||
});
|
||
return results;
|
||
});
|
||
|
||
await browser.close();
|
||
return items;
|
||
} catch (error) {
|
||
console.error("Puppeteer scraping failed:", error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function scrapeWithCheerio(url) {
|
||
const { data } = await axios.get(url);
|
||
if (data.includes("Checking your browser")) return null;
|
||
const $ = cheerio.load(data);
|
||
const items = [];
|
||
$("li.s-card--horizontal").each((i, el) => {
|
||
const payload = {};
|
||
const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim();
|
||
if (!soldText) return;
|
||
payload.sold_out_date = soldText.replace("Sold", "").trim();
|
||
const date = new Date(soldText.replace("Sold", "").trim());
|
||
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
|
||
if (daysDiff > 7) return;
|
||
|
||
const linkEl = $(el).find("div.su-media__image a").attr("href");
|
||
if (linkEl) {
|
||
payload.link_detail = linkEl;
|
||
const match = linkEl.match(/\/itm\/(\d+)/);
|
||
if (match) payload.id = match[1];
|
||
}
|
||
|
||
payload.name = $(el)
|
||
.find(".s-card__title")
|
||
.text()
|
||
.replace(/New\s*listing/i, "")
|
||
.trim();
|
||
payload.condition = $(el).find(".s-card__subtitle").text().trim();
|
||
const priceText = $(el).find(".s-card__price").text().trim().replace(",", "");
|
||
const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/);
|
||
if (match) {
|
||
const currency = match[1] || match[3] || "";
|
||
payload.price = match[2];
|
||
payload.currency = currency.replace("£", "GBP").replace("$", "USD");
|
||
if (!payload.currency?.trim()) payload.currency = "USD";
|
||
}
|
||
if (payload.id) items.push(payload);
|
||
});
|
||
return items;
|
||
}
|
||
|
||
async function sendMail(subject, body) {
|
||
try {
|
||
await axios({
|
||
url: process.env.API_DISTI_HOST + "/api/export/items-sold-out",
|
||
method: "GET",
|
||
responseType: "arraybuffer",
|
||
timeout: 60000,
|
||
headers: {
|
||
"x-key": "CanTho#1",
|
||
},
|
||
}).then(async (response) => {
|
||
// console.log(response);
|
||
const now = new Date();
|
||
const year = now.getFullYear();
|
||
const month = String(now.getMonth() + 1).padStart(2, "0");
|
||
const day = String(now.getDate()).padStart(2, "0");
|
||
const fileName = `items_sold_out_${year}_${month}_${day}.xlsx`;
|
||
|
||
const transporter = nodemailer.createTransport({
|
||
host: process.env.MAIL_IP,
|
||
port: process.env.MAIL_PORT,
|
||
secure: true,
|
||
auth: {
|
||
user: process.env.MAIL_USERNAME,
|
||
pass: process.env.MAIL_PASSWORD,
|
||
},
|
||
connectionTimeout: 10000, // prevent timeout
|
||
pool: true, // reuse connection if many mails
|
||
tls: { servername: process.env.MAIL_HOST },
|
||
});
|
||
|
||
const mailOptions = {
|
||
from: process.env.MAIL_USERNAME,
|
||
to: EMAILS.join(","),
|
||
subject,
|
||
text: body,
|
||
attachments: [
|
||
{
|
||
filename: fileName,
|
||
content: response.data, // attach from memory
|
||
},
|
||
],
|
||
};
|
||
|
||
await transporter.sendMail(mailOptions);
|
||
console.log("✅ Email sent successfully with Excel attachment!");
|
||
});
|
||
} catch (err) {
|
||
console.error("❌ Failed to send email:", err.message);
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
const db = await mysql.createConnection({
|
||
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
|
||
user: process.env.MYSQL_USER,
|
||
password: process.env.MYSQL_PASSWORD,
|
||
database: process.env.MYSQL_DB_NAME,
|
||
});
|
||
|
||
console.log("✅ Connected to MySQL");
|
||
|
||
const errors = [];
|
||
const inserted = [];
|
||
|
||
for (const store of LIST_STORE) {
|
||
console.log(`Processing ${store.name}`);
|
||
let items = await scrapeWithCheerio(store.url);
|
||
if (!items) items = await scrapeWithPuppeteer(store.url, store.name);
|
||
let count = 0;
|
||
for (const item of items) {
|
||
// 2️⃣ Check if record exists
|
||
const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]);
|
||
if (rows.length > 0) continue;
|
||
|
||
// 3️⃣ Insert new record
|
||
const priceText = item.price || "";
|
||
const title = (item.name || "").replace("Opens in a new window or tab", "").trim();
|
||
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
|
||
await db.execute(
|
||
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||
[
|
||
item.id || "",
|
||
title,
|
||
item.condition || "",
|
||
amount || 0,
|
||
item.currency || "",
|
||
item.link_detail || "",
|
||
store.name || "",
|
||
item.sold_out_date || "",
|
||
dayjs().format("YYYY-MM-DD HH:mm:ss"),
|
||
dayjs().format("YYYY-MM-DD HH:mm:ss"),
|
||
]
|
||
);
|
||
count++;
|
||
}
|
||
|
||
if (count === 0) errors.push({ url: store.url, message: "No new items inserted" });
|
||
inserted.push({ name: store.name, count });
|
||
}
|
||
|
||
if (errors.length > 3) {
|
||
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
|
||
await sendMail("[Sold out] - Scraping Sold Out Error Report", msg);
|
||
} else {
|
||
const msg = inserted.map((i) => `Shop: ${i.name}\nSold: ${i.count} items`).join("\n\n");
|
||
await sendMail("[Sold out] - Scraping Sold Out Success", msg);
|
||
}
|
||
|
||
console.log("✅ Done scraping.");
|
||
|
||
await db.end();
|
||
console.log("🔌 MySQL connection closed");
|
||
process.exit(0);
|
||
}
|
||
|
||
main().catch((err) => {
|
||
console.error(err);
|
||
process.exit(1);
|
||
});
|