validmind.prompt_validation.Bias |
Bias |
Evaluates bias in a Large Language Model based on the order and distribution of exemplars in a prompt.... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.prompt_validation.Clarity |
Clarity |
Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines.... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.prompt_validation.Specificity |
Specificity |
Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity,... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.prompt_validation.Robustness |
Robustness |
Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts.... |
['model'] |
{'num_tests': 10} |
validmind.prompt_validation.NegativeInstruction |
Negative Instruction |
Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.prompt_validation.Conciseness |
Conciseness |
Analyzes and grades the conciseness of prompts provided to a Large Language Model.... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.prompt_validation.Delimitation |
Delimitation |
Evaluates the proper use of delimiters in prompts provided to Large Language Models.... |
['model.prompt'] |
{'min_threshold': 7} |
validmind.model_validation.BertScore |
Bert Score |
Evaluates the quality of machine-generated text using BERTScore metrics and visualizes the results through histograms... |
['dataset', 'model'] |
{} |
validmind.model_validation.RegardScore |
Regard Score |
Computes and visualizes the regard score for each text instance, assessing sentiment and potential biases.... |
['dataset', 'model'] |
{} |
validmind.model_validation.BleuScore |
Bleu Score |
Evaluates the quality of machine-generated text using BLEU metrics and visualizes the results through histograms... |
['dataset', 'model'] |
{} |
validmind.model_validation.RegressionResidualsPlot |
Regression Residuals Plot |
Evaluates regression model performance using residual distribution and actual vs. predicted plots.... |
['model', 'dataset'] |
{'bin_size': 0.1} |
validmind.model_validation.FeaturesAUC |
Features AUC |
Evaluates the discriminatory power of each individual feature within a binary classification model by calculating the Area Under the Curve (AUC) for each feature separately.... |
['model', 'dataset'] |
{'fontsize': 12, 'figure_height': 500} |
validmind.model_validation.ContextualRecall |
Contextual Recall |
Evaluates a Natural Language Generation model's ability to generate contextually relevant and factually correct text, visualizing the results through histograms and bar charts, alongside compiling a comprehensive table of descriptive statistics for contextual recall scores.... |
['dataset', 'model'] |
{} |
validmind.model_validation.MeteorScore |
Meteor Score |
Computes and visualizes the METEOR score for each text generation instance, assessing translation quality.... |
['dataset', 'model'] |
{} |
validmind.model_validation.RougeScore |
Rouge Score |
Evaluates the quality of machine-generated text using ROUGE metrics and visualizes the results through histograms... |
['dataset', 'model'] |
{'metric': 'rouge-1'} |
validmind.model_validation.ModelMetadata |
Model Metadata |
Extracts and summarizes critical metadata from a machine learning model instance for comprehensive analysis.... |
['model'] |
{} |
validmind.model_validation.ClusterSizeDistribution |
Cluster Size Distribution |
Compares and visualizes the distribution of cluster sizes in model predictions and actual data for assessing... |
['model', 'dataset'] |
{} |
validmind.model_validation.TokenDisparity |
Token Disparity |
Evaluates the token disparity between reference and generated texts, visualizing the results through histograms... |
['dataset', 'model'] |
{} |
validmind.model_validation.ToxicityScore |
Toxicity Score |
Computes and visualizes the toxicity score for input text, true text, and predicted text, assessing content quality and potential risk.... |
['dataset', 'model'] |
{} |
validmind.model_validation.embeddings.CosineSimilarityComparison |
Cosine Similarity Comparison |
Computes pairwise cosine similarities between model embeddings and visualizes the results through bar charts,... |
['dataset', 'models'] |
{} |
validmind.model_validation.embeddings.EmbeddingsVisualization2D |
Embeddings Visualization2 D |
Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.... |
['model', 'dataset'] |
{'cluster_column': None, 'perplexity': 30} |
validmind.model_validation.embeddings.StabilityAnalysisRandomNoise |
Stability Analysis Random Noise |
Evaluate robustness of embeddings models to random noise introduced by using... |
['model', 'dataset'] |
{'mean_similarity_threshold': 0.7, 'probability': 0.02} |
validmind.model_validation.embeddings.TSNEComponentsPairwisePlots |
TSNE Components Pairwise Plots |
Plots individual scatter plots for pairwise combinations of t-SNE components of embeddings.... |
['dataset', 'model'] |
{'n_components': 2, 'perplexity': 30, 'title': 't-SNE'} |
validmind.model_validation.embeddings.CosineSimilarityDistribution |
Cosine Similarity Distribution |
Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution... |
['model', 'dataset'] |
{} |
validmind.model_validation.embeddings.PCAComponentsPairwisePlots |
PCA Components Pairwise Plots |
Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model embeddings.... |
['dataset', 'model'] |
{'n_components': 3} |
validmind.model_validation.embeddings.CosineSimilarityHeatmap |
Cosine Similarity Heatmap |
Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model.... |
['dataset', 'model'] |
{'title': 'Cosine Similarity Matrix', 'color': 'Cosine Similarity', 'xaxis_title': 'Index', 'yaxis_title': 'Index', 'color_scale': 'Blues'} |
validmind.model_validation.embeddings.StabilityAnalysisTranslation |
Stability Analysis Translation |
Evaluate robustness of embeddings models to noise introduced by translating... |
['model', 'dataset'] |
{'source_lang': 'en', 'target_lang': 'fr', 'mean_similarity_threshold': 0.7} |
validmind.model_validation.embeddings.EuclideanDistanceComparison |
Euclidean Distance Comparison |
Computes pairwise Euclidean distances between model embeddings and visualizes the results through bar charts,... |
['dataset', 'models'] |
{} |
validmind.model_validation.embeddings.ClusterDistribution |
Cluster Distribution |
Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.... |
['model', 'dataset'] |
{'num_clusters': 5} |
validmind.model_validation.embeddings.EuclideanDistanceHeatmap |
Euclidean Distance Heatmap |
Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model.... |
['dataset', 'model'] |
{'title': 'Euclidean Distance Matrix', 'color': 'Euclidean Distance', 'xaxis_title': 'Index', 'yaxis_title': 'Index', 'color_scale': 'Blues'} |
validmind.model_validation.embeddings.StabilityAnalysis |
Stability Analysis |
Base class for embeddings stability analysis tests |
['model', 'dataset'] |
{'mean_similarity_threshold': 0.7} |
validmind.model_validation.embeddings.StabilityAnalysisKeyword |
Stability Analysis Keyword |
Evaluate robustness of embeddings models to keyword swaps on the test dataset... |
['model', 'dataset'] |
{'keyword_dict': None, 'mean_similarity_threshold': 0.7} |
validmind.model_validation.embeddings.StabilityAnalysisSynonyms |
Stability Analysis Synonyms |
Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly.... |
['model', 'dataset'] |
{'probability': 0.02, 'mean_similarity_threshold': 0.7} |
validmind.model_validation.embeddings.DescriptiveAnalytics |
Descriptive Analytics |
Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation... |
['model', 'dataset'] |
{} |
validmind.model_validation.ragas.ContextEntityRecall |
Context Entity Recall |
Evaluates the context entity recall for dataset entries and visualizes the results.... |
['dataset'] |
{'contexts_column': 'contexts', 'ground_truth_column': 'ground_truth'} |
validmind.model_validation.ragas.Faithfulness |
Faithfulness |
Evaluates the faithfulness of the generated answers with respect to retrieved contexts.... |
['dataset'] |
{'answer_column': 'answer', 'contexts_column': 'contexts'} |
validmind.model_validation.ragas.AspectCritique |
Aspect Critique |
Evaluates generations against the following aspects: harmfulness, maliciousness,... |
['dataset'] |
{'question_column': 'question', 'answer_column': 'answer', 'contexts_column': 'contexts', 'aspects': ['coherence', 'conciseness', 'correctness', 'harmfulness', 'maliciousness'], 'additional_aspects': None} |
validmind.model_validation.ragas.AnswerSimilarity |
Answer Similarity |
Calculates the semantic similarity between generated answers and ground truths... |
['dataset'] |
{'answer_column': 'answer', 'ground_truth_column': 'ground_truth'} |
validmind.model_validation.ragas.AnswerCorrectness |
Answer Correctness |
Evaluates the correctness of answers in a dataset with respect to the provided ground... |
['dataset'] |
{'question_column': 'question', 'answer_column': 'answer', 'ground_truth_column': 'ground_truth'} |
validmind.model_validation.ragas.ContextRecall |
Context Recall |
Context recall measures the extent to which the retrieved context aligns with the... |
['dataset'] |
{'question_column': 'question', 'contexts_column': 'contexts', 'ground_truth_column': 'ground_truth'} |
validmind.model_validation.ragas.ContextRelevancy |
Context Relevancy |
Evaluates the context relevancy metric for entries in a dataset and visualizes the... |
['dataset'] |
{'question_column': 'question', 'contexts_column': 'contexts'} |
validmind.model_validation.ragas.ContextPrecision |
Context Precision |
Context Precision is a metric that evaluates whether all of the ground-truth... |
['dataset'] |
{'question_column': 'question', 'contexts_column': 'contexts', 'ground_truth_column': 'ground_truth'} |
validmind.model_validation.ragas.AnswerRelevance |
Answer Relevance |
Assesses how pertinent the generated answer is to the given prompt.... |
['dataset'] |
{'question_column': 'question', 'contexts_column': 'contexts', 'answer_column': 'answer'} |
validmind.model_validation.sklearn.RegressionModelsPerformanceComparison |
Regression Models Performance Comparison |
Compares and evaluates the performance of multiple regression models using five different metrics: MAE, MSE, RMSE,... |
['dataset', 'models'] |
{} |
validmind.model_validation.sklearn.AdjustedMutualInformation |
Adjusted Mutual Information |
Evaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.SilhouettePlot |
Silhouette Plot |
Calculates and visualizes Silhouette Score, assessing degree of data point suitability to its cluster in ML models.... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.RobustnessDiagnosis |
Robustness Diagnosis |
Evaluates the robustness of a machine learning model by injecting Gaussian noise to input data and measuring... |
['model', 'datasets'] |
{'features_columns': None, 'scaling_factor_std_dev_list': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5], 'accuracy_decay_threshold': 4} |
validmind.model_validation.sklearn.AdjustedRandIndex |
Adjusted Rand Index |
Measures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.SHAPGlobalImportance |
SHAP Global Importance |
Evaluates and visualizes global feature importance using SHAP values for model explanation and risk identification.... |
['model', 'dataset'] |
{'kernel_explainer_samples': 10, 'tree_or_linear_explainer_samples': 200} |
validmind.model_validation.sklearn.ConfusionMatrix |
Confusion Matrix |
Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.HomogeneityScore |
Homogeneity Score |
Assesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.CompletenessScore |
Completeness Score |
Evaluates a clustering model's capacity to categorize instances from a single class into the same cluster.... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.OverfitDiagnosis |
Overfit Diagnosis |
Detects and visualizes overfit regions in an ML model by comparing performance on training and test datasets.... |
['model', 'datasets'] |
{'features_columns': None, 'cut_off_percentage': 4} |
validmind.model_validation.sklearn.ClusterPerformanceMetrics |
Cluster Performance Metrics |
Evaluates the performance of clustering machine learning models using multiple established metrics.... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.PermutationFeatureImportance |
Permutation Feature Importance |
Assesses the significance of each feature in a model by evaluating the impact on model performance when feature... |
['model', 'dataset'] |
{'fontsize': None, 'figure_height': 1000} |
validmind.model_validation.sklearn.FowlkesMallowsScore |
Fowlkes Mallows Score |
Evaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.MinimumROCAUCScore |
Minimum ROCAUC Score |
Validates model by checking if the ROC AUC score meets or surpasses a specified threshold.... |
['model', 'dataset'] |
{'min_threshold': 0.5} |
validmind.model_validation.sklearn.ClusterCosineSimilarity |
Cluster Cosine Similarity |
Measures the intra-cluster similarity of a clustering model using cosine similarity.... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.PrecisionRecallCurve |
Precision Recall Curve |
Evaluates the precision-recall trade-off for binary classification models and visualizes the Precision-Recall curve.... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.ClassifierPerformance |
Classifier Performance |
Evaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.VMeasure |
V Measure |
Evaluates homogeneity and completeness of a clustering model using the V Measure Score.... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.MinimumF1Score |
Minimum F1 Score |
Evaluates if the model's F1 score on the validation set meets a predefined minimum threshold.... |
['model', 'dataset'] |
{'min_threshold': 0.5} |
validmind.model_validation.sklearn.ROCCurve |
ROC Curve |
Evaluates binary classification model performance by generating and plotting the Receiver Operating Characteristic... |
['model', 'dataset'] |
{} |
validmind.model_validation.sklearn.RegressionR2Square |
Regression R2 Square |
**Purpose**: The purpose of the RegressionR2Square Metric test is to measure the overall goodness-of-fit of a... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.RegressionErrors |
Regression Errors |
**Purpose**: This metric is used to measure the performance of a regression model. It gauges the model's accuracy... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.ClusterPerformance |
Cluster Performance |
Evaluates and compares a clustering model's performance on training and testing datasets using multiple defined... |
['model', 'datasets'] |
{} |
validmind.model_validation.sklearn.TrainingTestDegradation |
Training Test Degradation |
Tests if model performance degradation between training and test datasets exceeds a predefined threshold.... |
['model', 'datasets'] |
{'metrics': ['accuracy', 'precision', 'recall', 'f1'], 'max_threshold': 0.1} |
validmind.model_validation.sklearn.HyperParametersTuning |
Hyper Parameters Tuning |
Exerts exhaustive grid search to identify optimal hyperparameters for the model, improving performance.... |
['model', 'dataset'] |
{'param_grid': None, 'scoring': None} |
validmind.model_validation.sklearn.KMeansClustersOptimization |
K Means Clusters Optimization |
Optimizes the number of clusters in K-means models using Elbow and Silhouette methods.... |
['model', 'dataset'] |
{'n_clusters': None} |
validmind.model_validation.sklearn.ModelsPerformanceComparison |
Models Performance Comparison |
Evaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,... |
['dataset', 'models'] |
{} |
validmind.model_validation.sklearn.WeakspotsDiagnosis |
Weakspots Diagnosis |
Identifies and visualizes weak spots in a machine learning model's performance across various sections of the... |
['model', 'datasets'] |
{'features_columns': None, 'thresholds': {'accuracy': 0.75, 'precision': 0.5, 'recall': 0.5, 'f1': 0.7}} |
validmind.model_validation.sklearn.PopulationStabilityIndex |
Population Stability Index |
Evaluates the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across... |
['model', 'datasets'] |
{'num_bins': 10, 'mode': 'fixed'} |
validmind.model_validation.sklearn.MinimumAccuracy |
Minimum Accuracy |
Checks if the model's prediction accuracy meets or surpasses a specified threshold.... |
['model', 'dataset'] |
{'min_threshold': 0.7} |
validmind.model_validation.statsmodels.RegressionModelsCoeffs |
Regression Models Coeffs |
Compares feature importance by evaluating and contrasting coefficients of different regression models.... |
['models'] |
{} |
validmind.model_validation.statsmodels.BoxPierce |
Box Pierce |
Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.RegressionCoeffsPlot |
Regression Coeffs Plot |
Visualizes regression coefficients with 95% confidence intervals to assess predictor variables' impact on response... |
['models'] |
{} |
validmind.model_validation.statsmodels.RegressionModelSensitivityPlot |
Regression Model Sensitivity Plot |
Tests the sensitivity of a regression model to variations in independent variables by applying shocks and... |
['models', 'datasets'] |
{'transformation': None, 'shocks': [0.1]} |
validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels |
Regression Model Forecast Plot Levels |
Compares and visualizes forecasted and actual values of regression models on both raw and transformed datasets.... |
['models', 'datasets'] |
{'transformation': None} |
validmind.model_validation.statsmodels.ScorecardHistogram |
Scorecard Histogram |
Creates histograms of credit scores, from both default and non-default instances, generated by a credit-risk model.... |
['datasets'] |
{'title': 'Histogram of Scores', 'score_column': 'score'} |
validmind.model_validation.statsmodels.LJungBox |
L Jung Box |
Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.JarqueBera |
Jarque Bera |
Assesses normality of dataset features in an ML model using the Jarque-Bera test.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.KolmogorovSmirnov |
Kolmogorov Smirnov |
Executes a feature-wise Kolmogorov-Smirnov test to evaluate alignment with normal distribution in datasets.... |
['dataset'] |
{'dist': 'norm'} |
validmind.model_validation.statsmodels.ShapiroWilk |
Shapiro Wilk |
Evaluates feature-wise normality of training data using the Shapiro-Wilk test.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.CumulativePredictionProbabilities |
Cumulative Prediction Probabilities |
Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic... |
['model', 'datasets'] |
{'title': 'Cumulative Probabilities'} |
validmind.model_validation.statsmodels.RegressionFeatureSignificance |
Regression Feature Significance |
Assesses and visualizes the statistical significance of features in a set of regression models.... |
['models'] |
{'fontsize': 10, 'p_threshold': 0.05} |
validmind.model_validation.statsmodels.RegressionModelSummary |
Regression Model Summary |
Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.... |
['model', 'dataset'] |
{} |
validmind.model_validation.statsmodels.Lilliefors |
Lilliefors |
Assesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.RunsTest |
Runs Test |
Executes Runs Test on ML model to detect non-random patterns in output data sequence.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.RegressionPermutationFeatureImportance |
Regression Permutation Feature Importance |
Assesses the significance of each feature in a model by evaluating the impact on model performance when feature... |
['model', 'dataset'] |
{'fontsize': 12, 'figure_height': 500} |
validmind.model_validation.statsmodels.PredictionProbabilitiesHistogram |
Prediction Probabilities Histogram |
Generates and visualizes histograms of the Probability of Default predictions for both positive and negative... |
['model', 'datasets'] |
{'title': 'Histogram of Predictive Probabilities'} |
validmind.model_validation.statsmodels.AutoARIMA |
Auto ARIMA |
Evaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria.... |
['dataset'] |
{} |
validmind.model_validation.statsmodels.GINITable |
GINI Table |
Evaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets.... |
['model', 'datasets'] |
{} |
validmind.model_validation.statsmodels.RegressionModelForecastPlot |
Regression Model Forecast Plot |
Generates plots to visually compare the forecasted outcomes of one or more regression models against actual... |
['models', 'datasets'] |
{'start_date': None, 'end_date': None} |
validmind.model_validation.statsmodels.DurbinWatsonTest |
Durbin Watson Test |
Assesses autocorrelation in time series data features using the Durbin-Watson statistic.... |
['dataset'] |
{} |
validmind.data_validation.MissingValuesRisk |
Missing Values Risk |
Assesses and quantifies the risk related to missing values in a dataset used for training an ML model.... |
['dataset'] |
{} |
validmind.data_validation.IQROutliersTable |
IQR Outliers Table |
Determines and summarizes outliers in numerical features using Interquartile Range method.... |
['dataset'] |
{'features': None, 'threshold': 1.5} |
validmind.data_validation.BivariateFeaturesBarPlots |
Bivariate Features Bar Plots |
Generates visual bar plots to analyze the relationship between paired features within categorical data in the model.... |
['dataset'] |
{'features_pairs': None} |
validmind.data_validation.Skewness |
Skewness |
Evaluates the skewness of numerical data in a machine learning model and checks if it falls below a set maximum... |
['dataset'] |
{'max_threshold': 1} |
validmind.data_validation.Duplicates |
Duplicates |
Tests dataset for duplicate entries, ensuring model reliability via data quality verification.... |
['dataset'] |
{'min_threshold': 1} |
validmind.data_validation.MissingValuesBarPlot |
Missing Values Bar Plot |
Creates a bar plot showcasing the percentage of missing values in each column of the dataset with risk... |
['dataset'] |
{'threshold': 80, 'fig_height': 600} |
validmind.data_validation.DatasetDescription |
Dataset Description |
Provides comprehensive analysis and statistical summaries of each field in a machine learning model's dataset.... |
['dataset'] |
{} |
validmind.data_validation.ZivotAndrewsArch |
Zivot Andrews Arch |
Evaluates the order of integration and stationarity of time series data using Zivot-Andrews unit root test.... |
['dataset'] |
{} |
validmind.data_validation.ScatterPlot |
Scatter Plot |
Creates a scatter plot matrix to visually analyze feature relationships, patterns, and outliers in a dataset.... |
['dataset'] |
{} |
validmind.data_validation.TimeSeriesOutliers |
Time Series Outliers |
Identifies and visualizes outliers in time-series data using z-score method.... |
['dataset'] |
{'zscore_threshold': 3} |
validmind.data_validation.TabularCategoricalBarPlots |
Tabular Categorical Bar Plots |
Generates and visualizes bar plots for each category in categorical features to evaluate dataset's composition.... |
['dataset'] |
{} |
validmind.data_validation.AutoStationarity |
Auto Stationarity |
Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.... |
['dataset'] |
{'max_order': 5, 'threshold': 0.05} |
validmind.data_validation.DescriptiveStatistics |
Descriptive Statistics |
Performs a detailed descriptive statistical analysis of both numerical and categorical data within a model's... |
['dataset'] |
{} |
validmind.data_validation.ANOVAOneWayTable |
ANOVA One Way Table |
Applies one-way ANOVA (Analysis of Variance) to identify statistically significant numerical features in the... |
['dataset'] |
{'features': None, 'p_threshold': 0.05} |
validmind.data_validation.TargetRateBarPlots |
Target Rate Bar Plots |
Generates bar plots visualizing the default rates of categorical features for a classification machine learning... |
['dataset'] |
{'default_column': None, 'columns': None} |
validmind.data_validation.PearsonCorrelationMatrix |
Pearson Correlation Matrix |
Evaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map.... |
['dataset'] |
{} |
validmind.data_validation.FeatureTargetCorrelationPlot |
Feature Target Correlation Plot |
Visualizes the correlation between input features and model's target output in a color-coded horizontal bar plot.... |
['dataset'] |
{'features': None, 'fig_height': 600} |
validmind.data_validation.TabularNumericalHistograms |
Tabular Numerical Histograms |
Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and... |
['dataset'] |
{} |
validmind.data_validation.IsolationForestOutliers |
Isolation Forest Outliers |
Detects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots.... |
['dataset'] |
{'random_state': 0, 'contamination': 0.1, 'features_columns': None} |
validmind.data_validation.ChiSquaredFeaturesTable |
Chi Squared Features Table |
Executes Chi-Squared test for each categorical feature against a target column to assess significant association.... |
['dataset'] |
{'cat_features': None, 'p_threshold': 0.05} |
validmind.data_validation.HighCardinality |
High Cardinality |
Assesses the number of unique values in categorical columns to detect high cardinality and potential overfitting.... |
['dataset'] |
{'num_threshold': 100, 'percent_threshold': 0.1, 'threshold_type': 'percent'} |
validmind.data_validation.MissingValues |
Missing Values |
Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold.... |
['dataset'] |
{'min_threshold': 1} |
validmind.data_validation.PhillipsPerronArch |
Phillips Perron Arch |
Executes Phillips-Perron test to assess the stationarity of time series data in each ML model feature.... |
['dataset'] |
{} |
validmind.data_validation.RollingStatsPlot |
Rolling Stats Plot |
This test evaluates the stationarity of time series data by plotting its rolling mean and standard deviation.... |
['dataset'] |
{'window_size': 12} |
validmind.data_validation.TabularDescriptionTables |
Tabular Description Tables |
Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.... |
['dataset'] |
{} |
validmind.data_validation.AutoMA |
Auto MA |
Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on... |
['dataset'] |
{'max_ma_order': 3} |
validmind.data_validation.UniqueRows |
Unique Rows |
Verifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold.... |
['dataset'] |
{'min_percent_threshold': 1} |
validmind.data_validation.TooManyZeroValues |
Too Many Zero Values |
Identifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold... |
['dataset'] |
{'max_percent_threshold': 0.03} |
validmind.data_validation.HighPearsonCorrelation |
High Pearson Correlation |
Identifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity.... |
['dataset'] |
{'max_threshold': 0.3} |
validmind.data_validation.ACFandPACFPlot |
AC Fand PACF Plot |
Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to... |
['dataset'] |
{} |
validmind.data_validation.BivariateHistograms |
Bivariate Histograms |
Generates bivariate histograms for paired features, aiding in visual inspection of categorical variables'... |
['dataset'] |
{'features_pairs': None, 'target_filter': None} |
validmind.data_validation.WOEBinTable |
WOE Bin Table |
Calculates and assesses the Weight of Evidence (WoE) and Information Value (IV) of each feature in a ML model.... |
['dataset'] |
{'breaks_adj': None} |
validmind.data_validation.HeatmapFeatureCorrelations |
Heatmap Feature Correlations |
Creates a heatmap to visually represent correlation patterns between pairs of numerical features in a dataset.... |
['dataset'] |
{'declutter': None, 'fontsize': None, 'num_features': None} |
validmind.data_validation.TimeSeriesFrequency |
Time Series Frequency |
Evaluates consistency of time series data frequency and generates a frequency plot.... |
['dataset'] |
{} |
validmind.data_validation.DatasetSplit |
Dataset Split |
Evaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML... |
['datasets'] |
{} |
validmind.data_validation.SpreadPlot |
Spread Plot |
Visualizes the spread relationship between pairs of time-series variables in a dataset, thereby aiding in... |
['dataset'] |
{} |
validmind.data_validation.TimeSeriesLinePlot |
Time Series Line Plot |
Generates and analyses time-series data through line plots revealing trends, patterns, anomalies over time.... |
['dataset'] |
{} |
validmind.data_validation.KPSS |
KPSS |
Executes KPSS unit root test to validate stationarity of time-series data in machine learning model.... |
['dataset'] |
{} |
validmind.data_validation.AutoSeasonality |
Auto Seasonality |
Automatically identifies and quantifies optimal seasonality in time series data to improve forecasting model... |
['dataset'] |
{'min_period': 1, 'max_period': 4} |
validmind.data_validation.BivariateScatterPlots |
Bivariate Scatter Plots |
Generates bivariate scatterplots to visually inspect relationships between pairs of predictor variables in machine... |
['dataset'] |
{'selected_columns': None} |
validmind.data_validation.EngleGrangerCoint |
Engle Granger Coint |
Validates co-integration in pairs of time series data using the Engle-Granger test and classifies them as... |
['dataset'] |
{'threshold': 0.05} |
validmind.data_validation.TimeSeriesMissingValues |
Time Series Missing Values |
Validates time-series data quality by confirming the count of missing values is below a certain threshold.... |
['dataset'] |
{'min_threshold': 1} |
validmind.data_validation.TimeSeriesHistogram |
Time Series Histogram |
Visualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines.... |
['dataset'] |
{} |
validmind.data_validation.LaggedCorrelationHeatmap |
Lagged Correlation Heatmap |
Assesses and visualizes correlation between target variable and lagged independent variables in a time-series... |
['dataset'] |
{} |
validmind.data_validation.SeasonalDecompose |
Seasonal Decompose |
Decomposes dataset features into observed, trend, seasonal, and residual components to identify patterns and... |
['dataset'] |
{'seasonal_model': 'additive'} |
validmind.data_validation.WOEBinPlots |
WOE Bin Plots |
Generates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power... |
['dataset'] |
{'breaks_adj': None, 'fig_height': 600, 'fig_width': 500} |
validmind.data_validation.ClassImbalance |
Class Imbalance |
Evaluates and quantifies class distribution imbalance in a dataset used by a machine learning model.... |
['dataset'] |
{'min_percent_threshold': 10} |
validmind.data_validation.IQROutliersBarPlot |
IQR Outliers Bar Plot |
Visualizes outlier distribution across percentiles in numerical data using Interquartile Range (IQR) method.... |
['dataset'] |
{'threshold': 1.5, 'num_features': None, 'fig_width': 800} |
validmind.data_validation.DFGLSArch |
DFGLS Arch |
Executes Dickey-Fuller GLS metric to determine order of integration and check stationarity in time series data.... |
['dataset'] |
{} |
validmind.data_validation.AutoAR |
Auto AR |
Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.... |
['dataset'] |
{'max_ar_order': 3} |
validmind.data_validation.TabularDateTimeHistograms |
Tabular Date Time Histograms |
Generates histograms to provide graphical insight into the distribution of time intervals in model's datetime data.... |
['dataset'] |
{} |
validmind.data_validation.ADF |
ADF |
Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.... |
['dataset'] |
{} |
validmind.data_validation.nlp.Toxicity |
Toxicity |
Analyzes the toxicity of text data within a dataset using a pre-trained toxicity model.... |
['dataset'] |
{} |
validmind.data_validation.nlp.PolarityAndSubjectivity |
Polarity And Subjectivity |
Analyzes the polarity and subjectivity of text data within a dataset.... |
['dataset'] |
{} |
validmind.data_validation.nlp.Punctuations |
Punctuations |
Analyzes and visualizes the frequency distribution of punctuation usage in a given text dataset.... |
['dataset'] |
{} |
validmind.data_validation.nlp.Sentiment |
Sentiment |
Analyzes the sentiment of text data within a dataset using the VADER sentiment analysis tool.... |
['dataset'] |
{} |
validmind.data_validation.nlp.CommonWords |
Common Words |
Identifies and visualizes the 40 most frequent non-stopwords in a specified text column within a dataset.... |
['dataset'] |
{} |
validmind.data_validation.nlp.Hashtags |
Hashtags |
Assesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam.... |
['dataset'] |
{'top_hashtags': 25} |
validmind.data_validation.nlp.LanguageDetection |
Language Detection |
Detects the language of each text entry in a dataset and visualizes the distribution of languages... |
['dataset'] |
{} |
validmind.data_validation.nlp.Mentions |
Mentions |
Calculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis.... |
['dataset'] |
{'top_mentions': 25} |
validmind.data_validation.nlp.TextDescription |
Text Description |
Performs comprehensive textual analysis on a dataset using NLTK, evaluating various parameters and generating... |
['dataset'] |
{'unwanted_tokens': {'dollar', ' ', 'ms', '``', 'mrs', 'mr', "s'", 'dr', "'s", "''", 'us', 's'}, 'num_top_words': 3, 'lang': 'english'} |
validmind.data_validation.nlp.StopWords |
Stop Words |
Evaluates and visualizes the frequency of English stop words in a text dataset against a defined threshold.... |
['dataset'] |
{'min_percent_threshold': 0.5, 'num_words': 25} |