LogAnalyze/crawl.js

135 lines
3.9 KiB
JavaScript

const axios = require("axios");
const cheerio = require("cheerio");
const mysql = require("mysql2/promise");
require("dotenv").config();
const db = mysql.createPool({
host: "localhost",
user: "root",
password: "",
database: "log_analysis",
});
// === Helper functions ===
function extractCommands(log) {
return {
inventory: /sh(ow)?\s+inv(entory)?/i.test(log),
version: /sh(ow)?\s+ver(sion)?/i.test(log),
license: /sh(ow)?\s+lic(ense)?/i.test(log),
logging: /sh(ow)?\s+log(ging)?/i.test(log),
};
}
function extractPIDVersion(log) {
const pidMatch = log.match(/PID:\s*([A-Z0-9\-]+)/);
const versionMatch = log.match(/Version\s+([\d\.A-Za-z\-]+)/);
return {
pid: pidMatch ? pidMatch[1] : "UNKNOWN",
version: versionMatch ? versionMatch[1] : "UNKNOWN",
};
}
function extractRelevantBlocksByCommand(log, commands) {
const lines = log.split("\n");
const commandPatterns = [
{ key: "inventory", regex: /sh(ow)?\s+inv(entory)?/i },
{ key: "version", regex: /sh(ow)?\s+ver(sion)?/i },
{ key: "license", regex: /sh(ow)?\s+lic(ense)?/i },
{ key: "logging", regex: /sh(ow)?\s+log(ging)?/i },
];
const result = {};
const included = new Set();
for (let i = 0; i < lines.length; i++) {
for (const { key, regex } of commandPatterns) {
if (commands[key] && !included.has(key) && regex.test(lines[i])) {
included.add(key);
const match = lines[i].match(/^(.+?[#>])\s*(sh(ow)?\s+\w+)/i);
const prompt = match?.[1] || "";
const block = [lines[i]];
for (let j = i + 1; j < lines.length; j++) {
if (lines[j].includes(prompt)) break;
block.push(lines[j]);
}
result[key] = block.join("\n");
}
}
}
return result;
}
async function fetchLogFile(url) {
const res = await axios.get(url);
return res.data;
}
async function crawlLogs(year = null) {
const BASE_URL = "http://172.16.5.7:8080";
console.log(`[${new Date().toISOString()}] Start crawl ${BASE_URL}`);
const res = await axios.get(BASE_URL);
const $ = cheerio.load(res.data);
const links = $("a")
.map((_, el) => $(el).attr("href"))
.get()
.filter((href) => href.endsWith(".log"));
const filteredLinks = year
? links.filter((l) => l.includes(`${year}`))
: links;
const splitBlocks = (logContent) =>
logContent
.split(/(?=^.*?[#>]\s*sh(ow)?\s+inv(entory)?)/gim)
.map((b) => b?.trim())
.filter(Boolean);
for (const link of filteredLinks) {
const fileUrl = `${BASE_URL}/${link}`;
const filename = link.replace(/\//g, "_");
try {
const log = await fetchLogFile(fileUrl);
const blocks = splitBlocks(log);
for (const block of blocks) {
const { pid, version } = extractPIDVersion(block);
const commands = extractCommands(block);
if (pid === "UNKNOWN" || version === "UNKNOWN") continue;
const [[device]] = await db.query(
`SELECT id FROM devices WHERE pid = ? AND version = ?`,
[pid, version]
);
let deviceId = device?.id;
if (!deviceId) {
const result = await db.query(
`INSERT INTO devices (pid, version) VALUES (?, ?)`,
[pid, version]
);
deviceId = result[0].insertId;
}
const commandBlocks = extractRelevantBlocksByCommand(block, commands);
for (const [command, output] of Object.entries(commandBlocks)) {
const table = `${command}_outputs`;
await db.query(
`INSERT INTO ${table} (device_id, filename, output) VALUES (?, ?, ?)`,
[deviceId, filename, output]
);
}
console.log(`${pid} ${version} - ${filename}`);
}
} catch (err) {
console.error(`${filename}: ${err.message}`);
}
}
}
// 👇 Call from command line
const yearArg = process.argv[2]; // e.g. node crawl.js 2023
crawlLogs(yearArg);