bid-tool/scrape-data-keyword/models/lawsons-scrap-model.js

96 lines
2.6 KiB
JavaScript

import browser from "../system/browser.js";
import { extractModelId, extractNumber } from "../system/ultils.js";
import { ScrapModel } from "./scrap-model.js";
export class LawsonsScrapModel extends ScrapModel {
action = async () => {
const urlsData = this.extractUrls();
for (let item of urlsData) {
await this.page.goto(item.url);
const data = await this.getItemsInHtml(item);
const results = this.filterItemByKeyword(item.keyword, data);
this.results[item.keyword] = results;
console.log({ results: this.results });
}
};
async getPrice(url) {
const newPage = await browser.newPage();
await newPage.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
);
try {
await newPage.goto(`${this.web_bid.origin_url}${url}`, {
waitUntil: "domcontentloaded", // hoặc "networkidle2" nếu cần đợi AJAX
});
await newPage.waitForSelector("#bidPrefix > span.font-weight-bold", {
timeout: 10000,
});
const priceText = await newPage
.$eval("#bidPrefix > span.font-weight-bold", (el) =>
el.textContent.trim()
)
.catch(() => null);
return extractNumber(priceText) || 0;
} catch (error) {
console.error(`Error getting price for ${url}:`, error);
return 0;
} finally {
await newPage.close();
}
}
getItemsInHtml = async (data) => {
await this.page.waitForSelector(".row.row-spacing > .lot-container", {
timeout: 10000,
});
const elements = await this.page.$$(".row.row-spacing > .lot-container");
const results = [];
for (const el of elements) {
const url = await el
.$eval("aside.search-lot--content.text-left > a", (el) =>
el.getAttribute("href")
)
.catch(() => null);
const image_url = await el
.$eval("figure.text-center.imgContainer img", (img) =>
img.getAttribute("src")
)
.catch(() => null);
const name = await el
.$eval(".font-weight-normal.text-grey.title", (el) =>
el.textContent.trim()
)
.catch(() => null);
const current_price = await this.getPrice(url);
results.push({
url: `${this.web_bid.origin_url}${url}`,
image_url,
name,
keyword: data.keyword,
model: extractModelId(`${this.web_bid.origin_url}${url}`),
current_price: current_price,
scrap_config_id: this.scrap_config_id,
});
}
return results;
};
}