import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import DBSCAN
# Load log data into a pandas DataFrame
log_data = pd.read_csv('log_file.csv')
# Extract relevant features for correlation (e.g., log message content)
features = log_data['log_message']
# Convert log messages to feature vectors using TF-IDF vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(features)
# Perform clustering to identify correlated log events
clustering_model = DBSCAN(eps=0.5, min_samples=2)
clusters = clustering_model.fit_predict(X)
# Add cluster labels to the log data
log_data['cluster_label'] = clusters
# Print the correlated log events
unique_clusters = log_data['cluster_label'].unique()
for cluster in unique_clusters:
cluster_data = log_data[log_data['cluster_label'] == cluster]
print("Cluster Label:", cluster)
print(cluster_data['log_message'])
print("------------------------------------")
Sample data for this is
log_message
Application started
Invalid input detected
Processing completed
Low disk space
Connection timed out
Database connection failed
No comments:
Post a Comment