日志收集
错误日志比较多,这里使用几个主要编程语言实现从日志文件夹中获取所有的错误日志并按时间排序
log_analysis.py
import sys
import os
import re
import datetime
import pandas as pd
def extract_time(log):
# 2023-11-20-16:24:15.634
pattern1 = '\d{4}-\d{2}-\d{2}:\d{2}:\d{2}.\d{3}'
# [Mon Nov 20 16:17:33 2023]
pattern2 = '\[.+\d{2} \d{2}:\d{2}:\d{2} \d{4}\]'
if re.findall(pattern1,log):
return re.findall(pattern,log)[0]
if re.findall(pattern2,log):
dd = re.findall(pattern2,log)[0][1:-1]
return datetime.datetime.dtrftime(datetime.datetime.strptime(dd, '%a %b %d %H:%M:%S %Y'), '%Y-%m-%d %H:%M:%S.000')
return ''
def extract_err(log):
res = []
with open(filename, 'r', errors = 'ignore') as f:
contents = f.readlines()
for n,c in enumerate(contents):
if re.findall('\[ERROR\]', c):
res.append({'filename':filename, 'line': n+1, 'time': extract_time(c), 'log':c.replace('\n', '')})
return res
def process(path)
err_logs = []
for path, dir, files in os.walk(path):
for filename in files:
doc = os.path.normpath(os.path.join(path,filename))
err_logs.extend(etract_err(doc))
df = pd.DataFrame(err_logs)
df.sort_values(by=['time','filename'],ascending=[True,True],inplace=True)
df.to_csv(path + '.csv')
path = sys.argv[1]
process(path)
log_analysis.cc
#include <iostream>
#include <fstream>
#include <filesystem>
#include <regex>
#include <vector>
#include <algorithm>
struct LogEntry {
std::string filename;
int line;
std::string time;
std::string log;
};
std::string extract_time(const std::string& log) {
std::regex pattern1("\\d{4}-\\d{2}-\\d{2}:\\d{2}:\\d{2}.\\d{3}");
std::regex pattern2("\\[.+\\d{2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\]");
std::smatch match;
if regex_search(log, match, pattern1) {
return match[0];
} else if regex_search(log, match, pattern2) {
std::string dd = match[0].str().substr(1, match[0].str().size() - 2);
std::tm tm = {};
std::istringstream ss(dd);
ss >> std::get_time(&tm, "%a %b %d %H:%M:%S %Y");
char buffer[20];
std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S.000", &tm);
return std::string(buffer);
}
return "";
}
std::vector<LogEntry> extract_err(const std::string& filename) {
std::vector<LogEntry> err_logs;
std::ifstream file(filename);
if (file.is_open()) {
std::string line;
int line_number = 1;
while getline(file, line) {
if (line.find("[ERROR]") != std::string::npos) {
err_logs.push_back({
filename,
line_number,
extract_time(line),
line
});
}
line_number++;
}
file.close();
}
return err_logs;
}
void process_logs(const std::string& path) {
std::vector<LogEntry> err_logs;
for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
if (entry.is_regular_file()) {
auto logs = extract_err(entry.path());
err_logs.insert(err_logs.end(), logs.begin(), logs.end());
}
}
std::sort(err_logs.begin(), err_logs.end(), [](const LogEntry& a, const LogEntry& b) {
return a.time < b.time || (a.time == b.time && a.filename < b.filename);
});
std::ofstream output_file(path + "/error_logs.csv");
output_file << "filename,line,time,log\n";
for (const auto& entry : err_logs) {
output_file << entry.filename << "," << entry.line << "," << entry.time << "," << entry.log << "\n";
}
output_file.close();
}
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <path_to_logs>\n";
return 1;
}
std::string path_to_logs = argv[1];
process_logs(path_to_logs);
return 0;
}
- 对于文件操作低版本gcc可参考g++ 7使用c++14 c++17高级特性
log_analysis.rs
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::error::Error;
use std::fs;
use regex::Regex;
use chrono::DateTime;
use chrono::offset::Utc;
use std::cmp::Ordering;
use std::env;
#[derive(Debug)]
struct LogEntry {
filename: String,
line: usize,
time: String,
log: String,
}
fn extract_time(log: &str) -> String {
let pattern1 = Regex::new(r"\d{4}-\d{2}-\d{2}:\d{2}:\d{2}.\d{3}").unwrap();
let pattern2 = Regex::new(r"\[.+\d{2} \d{2}:\d{2}:\d{2} \d{4}\]").unwrap();
if pattern1.is_match(log) {
return pattern1.find(log).unwrap().as_str().to_string();
} else if pattern2.is_match(log) {
let dd = &pattern2.find(log).unwrap().as_str()[1..(log.len() - 1)];
let dt = DateTime::parse_from_str(dd, "%a %b %d %H:%M:%S %Y").unwrap();
return dt.format("%Y-%m-%d %H:%M:%S.000").to_string();
}
String::new()
}
fn extract_err(filename: &str) -> Result<Vec<LogEntry>, Box<dyn Error>> {
let file = File::open(filename)?;
let reader = io::BufReader::new(file);
let mut err_logs = Vec::new();
let mut line_number = 1;
for line in reader.lines() {
let log_line = line?;
if log_line.contains("[ERROR]") {
err_logs.push(LogEntry {
filename: filename.to_string(),
line: line_number,
time: extract_time(&log_line),
log: log_line.trim().to_string(),
});
}
line_number += 1;
}
Ok(err_logs)
}
fn process_logs(path: &str) -> Result<(), Box<dyn Error>> {
let mut err_logs = Vec::new();
for entry in fs::read_dir(path)? {
let entry = entry?;
if entry.file_type()?.is_file() {
let logs = extract_err(entry.path().to_str().unwrap())?;
err_logs.extend(logs);
}
}
err_logs.sort_by(|a, b| a.time.cmp(&b.time).then_with(|| a.filename.cmp(&b.filename)));
let mut output_file = File::create(format!("{}/error_logs.csv", path))?;
writeln!(output_file, "filename,line,time,log")?;
for entry in err_logs {
writeln!(output_file, "{},{},{},{}", entry.filename, entry.line, entry.time, entry.log)?;
}
Ok(())
}
fn main() -> Result<(), Box<dyn Error>> {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <path_to_logs>", args[0]);
std::process::exit(1);
}
let path_to_logs = &args[1];
process_logs(path_to_logs)?;
Ok(())
}