import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
# Load log data into a pandas DataFrame
log_data = pd.read_csv('log_file.csv')
# Define the features and labels for log parsing
features = log_data['log_message']
timestamps = log_data['timestamp']
error_codes = log_data['error_code']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, timestamps, test_size=0.2, random_state=42)
# Convert log messages to feature vectors using CountVectorizer
vectorizer = CountVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)
# Train a logistic regression model for timestamp parsing
timestamp_model = LogisticRegression()
timestamp_model.fit(X_train_vectors, y_train)
# Predict the timestamps for test data
timestamp_predictions = timestamp_model.predict(X_test_vectors)
# Evaluate the timestamp model
timestamp_accuracy = np.mean(timestamp_predictions == y_test)
print("Timestamp Accuracy:", timestamp_accuracy)
# Split the data into training and testing sets for error code parsing
X_train, X_test, y_train, y_test = train_test_split(features, error_codes, test_size=0.2, random_state=42)
# Convert log messages to feature vectors using CountVectorizer
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)
# Train a logistic regression model for error code parsing
error_code_model = LogisticRegression()
error_code_model.fit(X_train_vectors, y_train)
# Predict the error codes for test data
error_code_predictions = error_code_model.predict(X_test_vectors)
# Evaluate the error code model
error_code_accuracy = np.mean(error_code_predictions == y_test)
print("Error Code Accuracy:", error_code_accuracy)
A sample file is
log_message,timestamp,error_code
[INFO] Application started,2023-06-01 10:23:45,
[ERROR] Invalid input detected,2023-06-02 14:57:21,ERR001
[INFO] Processing completed,2023-06-03 09:12:34,
[WARNING] Low disk space,2023-06-04 16:35:02,
[DEBUG] Connection timed out,2023-06-05 11:45:10,
[ERROR] Database connection failed,2023-06-06 08:21:56,ERR002
No comments:
Post a Comment