Initial commit

This commit is contained in:
nguyentrungthat 2025-10-20 17:22:29 +07:00
commit 12b192780a
5 changed files with 3486 additions and 0 deletions

15
.env Normal file
View File

@ -0,0 +1,15 @@
MAIL_MAILER=smtp
MAIL_HOST=mail.apactech.io
MAIL_PORT=465
MAIL_USERNAME=admin@apactech.io
MAIL_PASSWORD="BGK!dyt6upd2eax1bhz"
MAIL_ENCRYPTION=ssl
MAIL_FROM_ADDRESS=admin@apactech.io
MAIL_FROM_NAME="${APP_NAME}"
DB_CONNECTION=mysql
MYSQL_HOST=10.20.2.222
MYSQL_PORT=3306
MYSQL_DB_NAME=devnsw_disti
MYSQL_USER=devnsw_devteam
MYSQL_PASSWORD=12345678

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
node_modules/
.vscode/

228
index.js Normal file
View File

@ -0,0 +1,228 @@
import "dotenv/config";
import puppeteer from "puppeteer";
import axios from "axios";
import * as cheerio from "cheerio";
import nodemailer from "nodemailer";
import path from "path";
import dayjs from "dayjs";
import mysql from "mysql2/promise";
const EMAILS = ["andrew.ng@apactech.io"];
const LIST_STORE = [
{
name: "ocdepot",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=ocdepot&store_name=ocdepot&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "itinstock",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=itinstock&store_name=itinstock&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "kartechllc",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=luckywolf29&store_name=kartechllc&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
{
name: "g-electronic",
url: "https://www.ebay.com/sch/i.html?_dkr=1&iconV2Request=true&_blrs=recall_filtering&_ssn=g-electronic&store_name=gelectronic&_oac=1&LH_Sold=1&rt=nc&_ipg=240",
},
];
async function scrapeWithPuppeteer(url) {
try {
const browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle2" });
// Wait for cards or detect Cloudflare
const html = await page.content();
const needBrowserCheck = html.includes("Checking your browser");
if (needBrowserCheck) {
await page.waitForSelector("li.s-card--horizontal", { timeout: 15000 }).catch(() => null);
}
const items = await page.$$eval("li.s-card--horizontal", (nodes) => {
const results = [];
nodes.forEach((node) => {
const payload = {};
const soldEl = node.querySelector(".s-card__caption .su-styled-text.positive.default");
if (!soldEl) return;
const soldText = soldEl.textContent.trim().replace("Sold", "").trim();
payload.sold_out_date = soldText;
const date = new Date(soldText);
if (isNaN(date.getTime())) return;
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = node.querySelector("div.su-media__image a");
if (linkEl) {
payload.link_detail = linkEl.href;
const match = linkEl.href.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
const titleEl = node.querySelector(".s-card__title");
if (titleEl) {
payload.name = titleEl.textContent.replace(/New\s*listing/i, "").trim();
payload.description = payload.name;
}
const conditionEl = node.querySelector(".s-card__subtitle");
if (conditionEl) payload.condition = conditionEl.textContent.trim();
const priceEl = node.querySelector(".s-card__price");
if (priceEl) {
let txt = priceEl.textContent.replace(",", "").trim();
txt = txt.replace("£", "GBP ").replace("$", "USD ");
const match = txt.match(/([A-Za-z]{3})?\s?([\d.,]+)\s?([A-Za-z]{3})?/);
if (match) {
payload.currency = match[1] || "";
payload.price = match[2] || "";
}
}
if (payload.id) results.push(payload);
});
return results;
});
await browser.close();
return items;
} catch (error) {
console.error("Puppeteer scraping failed:", error);
return null;
}
}
async function scrapeWithCheerio(url) {
const { data } = await axios.get(url);
if (data.includes("Checking your browser")) return null;
const $ = cheerio.load(data);
const items = [];
$("li.s-card--horizontal").each((i, el) => {
const payload = {};
const soldText = $(el).find(".s-card__caption .su-styled-text.positive.default").text().trim();
if (!soldText) return;
payload.sold_out_date = soldText.replace("Sold", "").trim();
const date = new Date(soldText.replace("Sold", "").trim());
const daysDiff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
if (daysDiff > 7) return;
const linkEl = $(el).find("div.su-media__image a").attr("href");
if (linkEl) {
payload.link_detail = linkEl;
const match = linkEl.match(/\/itm\/(\d+)/);
if (match) payload.id = match[1];
}
payload.name = $(el)
.find(".s-card__title")
.text()
.replace(/New\s*listing/i, "")
.trim();
payload.condition = $(el).find(".s-card__subtitle").text().trim();
const priceText = $(el).find(".s-card__price").text().trim().replace(",", "");
const match = priceText.match(/([£$€A-Za-z]{1,5})?\s?([\d.,]+)\s?([£$€A-Za-z]{1,5})?/);
if (match) {
payload.price = match[2];
payload.currency = match[1] || match[3];
}
if (payload.id) items.push(payload);
});
return items;
}
async function sendMail(subject, body, attachmentPath = null) {
const transporter = nodemailer.createTransport({
host: process.env.MAIL_HOST,
port: process.env.MAIL_PORT,
secure: true,
auth: { user: process.env.MAIL_USERNAME, pass: process.env.MAIL_PASSWORD },
});
const mailOptions = {
from: process.env.MAIL_USERNAME,
to: EMAILS.join(","),
subject,
text: body,
};
if (attachmentPath) {
mailOptions.attachments = [{ filename: path.basename(attachmentPath), path: attachmentPath }];
}
await transporter.sendMail(mailOptions);
}
async function main() {
// 1⃣ Connect to MySQL
const db = await mysql.createConnection({
host: process.env.MYSQL_HOST, // e.g. '127.0.0.1' or remote IP
user: process.env.MYSQL_USER,
password: process.env.MYSQL_PASSWORD,
database: process.env.MYSQL_DB_NAME,
});
console.log("✅ Connected to MySQL");
const errors = [];
const inserted = [];
for (const store of LIST_STORE) {
console.log(`Processing ${store.name}`);
let items = await scrapeWithCheerio(store.url);
if (!items) items = await scrapeWithPuppeteer(store.url);
let count = 0;
for (const item of items) {
// 2⃣ Check if record exists
const [rows] = await db.execute("SELECT id FROM items_sold_out WHERE id = ?", [item.id]);
if (rows.length > 0) continue;
// 3⃣ Insert new record
const priceText = item.price || "";
const amount = parseFloat(priceText.replace(/[^\d.]/g, "").replace(/,/g, ""));
await db.execute(
`INSERT INTO items_sold_out (id, name, \`condition\`, price, currency, link_detail, shop_name, sold_out_date, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
item.id,
item.name,
item.condition,
amount || 0,
item.currency,
item.link_detail,
store.name,
item.sold_out_date,
dayjs().format("YYYY-MM-DD HH:mm:ss"),
dayjs().format("YYYY-MM-DD HH:mm:ss"),
]
);
count++;
}
if (count === 0) errors.push({ url: store.url, message: "No new items inserted" });
inserted.push({ name: store.name, count });
}
// 4⃣ Send email report
if (errors.length > 3) {
const msg = errors.map((e) => `URL: ${e.url}\nMessage: ${e.message}`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Error Report", msg);
} else {
const msg = inserted.map((i) => `Shop: <b>${i.name}</b>\nInserted: <b>${i.count}</b>`).join("\n\n");
await sendMail("[New Items] - Scraping Sold Out Success", msg);
}
console.log("✅ Done scraping.");
// 5⃣ Close MySQL connection
await db.end();
console.log("🔌 MySQL connection closed");
}
main().catch((err) => console.error(err));

3216
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

24
package.json Normal file
View File

@ -0,0 +1,24 @@
{
"name": "scrapsoldout",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"description": "",
"dependencies": {
"axios": "^1.12.2",
"cheerio": "^1.1.0",
"dayjs": "^1.11.18",
"dotenv": "^17.2.3",
"mysql2": "^3.15.2",
"nodemailer": "^7.0.9",
"puppeteer": "^24.24.1",
"sqlite": "^5.1.1",
"sqlite3": "^5.1.7",
"xlsx": "^0.18.5"
},
"type": "module"
}