日志收集

错误日志比较多,这里使用几个主要编程语言实现从日志文件夹中获取所有的错误日志并按时间排序

log_analysis.py

import sys
import os
import re
import datetime
import pandas as pd


def extract_time(log):
	# 2023-11-20-16:24:15.634
	pattern1 = '\d{4}-\d{2}-\d{2}:\d{2}:\d{2}.\d{3}'
	# [Mon Nov 20 16:17:33 2023]
	pattern2 = '\[.+\d{2} \d{2}:\d{2}:\d{2} \d{4}\]'
	if re.findall(pattern1,log):
		return re.findall(pattern,log)[0]
	if re.findall(pattern2,log):
		dd = re.findall(pattern2,log)[0][1:-1]
		return datetime.datetime.dtrftime(datetime.datetime.strptime(dd, '%a %b %d %H:%M:%S %Y'), '%Y-%m-%d %H:%M:%S.000')
	return ''

def extract_err(log):
	res = []
	with open(filename, 'r', errors = 'ignore') as f:
		contents = f.readlines()
	for n,c in enumerate(contents):
		if re.findall('\[ERROR\]', c):
			res.append({'filename':filename,  'line': n+1, 'time': extract_time(c), 'log':c.replace('\n', '')})
	return res

def process(path)
	err_logs = []
	for path, dir, files in os.walk(path):
		for filename in files:
			doc = os.path.normpath(os.path.join(path,filename))
			err_logs.extend(etract_err(doc))
	df = pd.DataFrame(err_logs)
	df.sort_values(by=['time','filename'],ascending=[True,True],inplace=True)
	df.to_csv(path + '.csv')

path = sys.argv[1]
process(path)

log_analysis.cc

#include <iostream>
#include <fstream>
#include <filesystem>
#include <regex>
#include <vector>
#include <algorithm>

struct LogEntry {
    std::string filename;
    int line;
    std::string time;
    std::string log;
};

std::string extract_time(const std::string& log) {
    std::regex pattern1("\\d{4}-\\d{2}-\\d{2}:\\d{2}:\\d{2}.\\d{3}");
    std::regex pattern2("\\[.+\\d{2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\]");

    std::smatch match;

    if regex_search(log, match, pattern1) {
        return match[0];
    } else if regex_search(log, match, pattern2) {
        std::string dd = match[0].str().substr(1, match[0].str().size() - 2);
        std::tm tm = {};
        std::istringstream ss(dd);
        ss >> std::get_time(&tm, "%a %b %d %H:%M:%S %Y");
        char buffer[20];
        std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S.000", &tm);
        return std::string(buffer);
    }

    return "";
}

std::vector<LogEntry> extract_err(const std::string& filename) {
    std::vector<LogEntry> err_logs;
    std::ifstream file(filename);

    if (file.is_open()) {
        std::string line;
        int line_number = 1;

        while getline(file, line) {
            if (line.find("[ERROR]") != std::string::npos) {
                err_logs.push_back({
                    filename,
                    line_number,
                    extract_time(line),
                    line
                });
            }

            line_number++;
        }

        file.close();
    }

    return err_logs;
}

void process_logs(const std::string& path) {
    std::vector<LogEntry> err_logs;

    for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
        if (entry.is_regular_file()) {
            auto logs = extract_err(entry.path());
            err_logs.insert(err_logs.end(), logs.begin(), logs.end());
        }
    }

    std::sort(err_logs.begin(), err_logs.end(), [](const LogEntry& a, const LogEntry& b) {
        return a.time < b.time || (a.time == b.time && a.filename < b.filename);
    });

    std::ofstream output_file(path + "/error_logs.csv");
    output_file << "filename,line,time,log\n";

    for (const auto& entry : err_logs) {
        output_file << entry.filename << "," << entry.line << "," << entry.time << "," << entry.log << "\n";
    }

    output_file.close();
}

int main(int argc, char* argv[]) {
    if (argc != 2) {
        std::cerr << "Usage: " << argv[0] << " <path_to_logs>\n";
        return 1;
    }
    std::string path_to_logs = argv[1];
    process_logs(path_to_logs);
    return 0;

}


log_analysis.rs

use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::error::Error;
use std::fs;
use regex::Regex;
use chrono::DateTime;
use chrono::offset::Utc;
use std::cmp::Ordering;
use std::env;

#[derive(Debug)]
struct LogEntry {
    filename: String,
    line: usize,
    time: String,
    log: String,
}

fn extract_time(log: &str) -> String {
    let pattern1 = Regex::new(r"\d{4}-\d{2}-\d{2}:\d{2}:\d{2}.\d{3}").unwrap();
    let pattern2 = Regex::new(r"\[.+\d{2} \d{2}:\d{2}:\d{2} \d{4}\]").unwrap();

    if pattern1.is_match(log) {
        return pattern1.find(log).unwrap().as_str().to_string();
    } else if pattern2.is_match(log) {
        let dd = &pattern2.find(log).unwrap().as_str()[1..(log.len() - 1)];
        let dt = DateTime::parse_from_str(dd, "%a %b %d %H:%M:%S %Y").unwrap();
        return dt.format("%Y-%m-%d %H:%M:%S.000").to_string();
    }

    String::new()
}

fn extract_err(filename: &str) -> Result<Vec<LogEntry>, Box<dyn Error>> {
    let file = File::open(filename)?;
    let reader = io::BufReader::new(file);
    let mut err_logs = Vec::new();
    let mut line_number = 1;

    for line in reader.lines() {
        let log_line = line?;
        if log_line.contains("[ERROR]") {
            err_logs.push(LogEntry {
                filename: filename.to_string(),
                line: line_number,
                time: extract_time(&log_line),
                log: log_line.trim().to_string(),
            });
        }
        line_number += 1;
    }

    Ok(err_logs)
}

fn process_logs(path: &str) -> Result<(), Box<dyn Error>> {
    let mut err_logs = Vec::new();

    for entry in fs::read_dir(path)? {
        let entry = entry?;
        if entry.file_type()?.is_file() {
            let logs = extract_err(entry.path().to_str().unwrap())?;
            err_logs.extend(logs);
        }
    }

    err_logs.sort_by(|a, b| a.time.cmp(&b.time).then_with(|| a.filename.cmp(&b.filename)));

    let mut output_file = File::create(format!("{}/error_logs.csv", path))?;
    writeln!(output_file, "filename,line,time,log")?;

    for entry in err_logs {
        writeln!(output_file, "{},{},{},{}", entry.filename, entry.line, entry.time, entry.log)?;
    }

    Ok(())
}

fn main() -> Result<(), Box<dyn Error>> {
    let args: Vec<String> = env::args().collect();
    if args.len() != 2 {
        eprintln!("Usage: {} <path_to_logs>", args[0]);
        std::process::exit(1);
    }

    let path_to_logs = &args[1];
    process_logs(path_to_logs)?;

    Ok(())
}