Исходный код abs_text_attack.main

"""
Отсюда будет запускаться консольное приложение по конструированию атаки.
# Для CI/CD:
# 1. Реализовать CLI с поддержкой путей к конфигу
# 2. Добавить режимы работы (тест/отчет/т.д.)
# 3. Поддержать разные форматы вывода (json/csv/html)
# Иерархический конфиг должен включать:
# - Общие настройки CI/CD (пороги успешности, отчеты)
# - Настройки запуска (логи, многопоточность)
# - Реестр атак с параметрами
"""

import os
import pickle

from sympy import vectorize

from config import LazyLoader
import argparse

from src.model_loader import ModelLoader

attacks = LazyLoader("abs_text_attack.attacks")
sklearn_ensemble = LazyLoader("sklearn.ensemble")
sklearn_linear_model = LazyLoader("sklearn.linear_model")
sklearn_svm = LazyLoader("sklearn.svm")
sklearn_naive_bayes = LazyLoader("sklearn.naive_bayes")
sklearn_feature_extraction_text = LazyLoader("sklearn.feature_extraction.text")
core_interfaces = LazyLoader("abs_text_attack.core.interfaces")
targets_custom_target = LazyLoader("abs_text_attack.targets.custom_target")

DATASET = None
TO_TRAIN = None
MODEL = None
ATTACKS = []

[документация] def get_model_from_config(model_dict): framework = model_dict["framework"] if framework == "sklearn": return core_interfaces.SklearnModel(*ModelLoader().load_model(model_dict)) if framework == "hf": return core_interfaces.HFModel(*ModelLoader().load_model(model_dict))
[документация] def get_dataset_from_config(dataset_dict): if dataset_dict['type'] == "huggingface": return (dataset_dict['name'], dataset_dict['split']) elif dataset_dict['type'] == "csv": return (dataset_dict['path'], dataset_dict['text_column'], dataset_dict['label_column'])
[документация] def get_attacks_and_params_from_config(attacks_list): chosen_attacks = [] params = [] for el in attacks_list: if el['name'] == "textfooler": chosen_attacks.append(attacks.TextFooler) params.append(el['params']) elif el['name'] == "pwws": chosen_attacks.append(attacks.PWWS) params.append(el['params']) elif el['name'] == "textbugger": chosen_attacks.append(attacks.TextBugger) params.append(el['params']) elif el['name'] == "pruthi": chosen_attacks.append(attacks.Pruthi) params.append(el['params']) elif el['name'] == "deepwordbug": chosen_attacks.append(attacks.DeepWordBug) params.append(el['params']) return chosen_attacks, params
[документация] def clear_screen(): os.system("cls" if os.name == "nt" else "clear")
[документация] def dataset_menu(): global DATASET while True: clear_screen() print("Choose your desired dataset:") print() print("1. Upload from file. (csv)") print("2. Upload from HuggingFace.") print("3. Back.") print() choice = input("Select one of the option: ") if choice == "1": DATASET = tuple([input("Please type dataset path."), "text", "label"]) print(f"You have selected {DATASET} dataset.") input("Press Enter to continue...") elif choice == "2": DATASET = tuple(input("Please type dataset name and split type (train/val/test). Split them with whitespace:")\ .split()) print(f"You have selected {DATASET[0]} dataset, {DATASET[1]} split.") input("Press Enter to continue...") elif choice == "3": break else: print("Wrong choice. Please try again.") input("Press Enter to continue...")
[документация] def model_menu(): global MODEL global TO_TRAIN while True: clear_screen() print("Choose your desired model:") print() print("1. Upload from .pkl file sklearn model.") print("2. Upload and train from HuggingFace. (Not implemented yet)") print("3. Create and train Random Forest.") print("4. Create and train Logistic Regression.") print("5. Create and train SVC.") print("6. Create and train Gradient Boosting.") print("7. Create and train Naive Bayes.") print("8. Back.") print() choice = input("Select one of the option: ") if choice == "1": # Пока только с пиклом, т.к. векторизатор может быть вообще любой. Чувствую, что выстрел в ногу неминуем... path_model, path_vectorizer = input("Please, specify paths to model and vectorizer/tokenizer. Split them with whitespace:")\ .split() loaded_model = None loaded_vectorizer = None with open(path_model, 'rb') as file: loaded_model = pickle.load(file) with open(path_vectorizer, 'rb') as file: loaded_vectorizer = pickle.load(file) MODEL = core_interfaces.SklearnModel(loaded_model, loaded_vectorizer) TO_TRAIN = False print(f"Nice choice!.") input("Press Enter to continue...") elif choice == "2": continue elif choice == "3": MODEL = core_interfaces.SklearnModel(sklearn_ensemble.RandomForestClassifier(), sklearn_feature_extraction_text.TfidfVectorizer(max_features=5000)) TO_TRAIN = True print(f"You have selected Random Forest model.") input("Press Enter to continue...") elif choice == "4": MODEL = core_interfaces.SklearnModel(sklearn_linear_model.LogisticRegression(), sklearn_feature_extraction_text.TfidfVectorizer(max_features=5000)) TO_TRAIN = True print(f"You have selected Logistic Regression model.") input("Press Enter to continue...") elif choice == "5": MODEL = core_interfaces.SklearnModel(sklearn_svm.SVC(), sklearn_feature_extraction_text.TfidfVectorizer(max_features=5000)) TO_TRAIN = True print(f"You have selected SVC model.") input("Press Enter to continue...") elif choice == "6": MODEL = core_interfaces.SklearnModel(sklearn_ensemble.GradientBoostingClassifier(), sklearn_feature_extraction_text.TfidfVectorizer(max_features=5000)) TO_TRAIN = True print(f"You have selected Gradient Boosting model.") input("Press Enter to continue...") elif choice == "7": MODEL = core_interfaces.SklearnModel(sklearn_naive_bayes.MultinomialNB(), sklearn_feature_extraction_text.TfidfVectorizer(max_features=5000)) TO_TRAIN = True print(f"You have selected Naive Bayes model.") input("Press Enter to continue...") elif choice == "8": break else: print("Wrong choice. Please try again.") input("Press Enter to continue...")
[документация] def attacks_menu(): global ATTACKS ATTACKS = set() while True: clear_screen() print("Choose your desired model:") print() print("1. Select TextFooler.") print("2. Select PWWS.") print("3. Select Pruthi.") print("4. Select DeepWordBug.") print("5. Select TextBugger.") print("6. Back.") print(f"Currently selected {ATTACKS}.") print() choice = input("Select one of the option: ") if choice == "1": ATTACKS.add(attacks.TextFooler) elif choice == "2": ATTACKS.add(attacks.PWWS) elif choice == "3": ATTACKS.add(attacks.Pruthi) elif choice == "4": ATTACKS.add(attacks.DeepWordBug) elif choice == "5": ATTACKS.add(attacks.TextBugger) elif choice == "6": break else: print("Wrong choice. Please try again.") input("Press Enter to continue...") ATTACKS = list(ATTACKS)
[документация] def run_cli(): global ATTACKS global DATASET global MODEL global TO_TRAIN while True: clear_screen() main_menu() choice = input("Select one of the option: ") if choice == "1": dataset_menu() elif choice == "2": model_menu() elif choice == "3": attacks_menu() elif choice == "4": # Тут надо будет ещё функцию с проверками навесить на правильность данных. target = targets_custom_target.CustomTarget(ATTACKS, [DATASET], [MODEL], TO_TRAIN) print(target.build()) break elif choice == "5": import json path = input("Please, specify path to JSON config.") with open(path, "r", encoding="utf-8") as f: config = json.load(f) TO_TRAIN = config['model']['to_train'] MODEL = get_model_from_config(config['model']) DATASET = get_dataset_from_config(config['dataset']) ATTACKS, attack_params = get_attacks_and_params_from_config(config['attacks']) target = targets_custom_target.CustomTarget(ATTACKS, [DATASET], [MODEL], TO_TRAIN, attack_params) print(target.build()) break elif choice == "6": print("Exiting program...") break else: print("Wrong choice. Please try again.") input("Press Enter to continue...")
[документация] def main(): parser = argparse.ArgumentParser(description="A program to execute attacks on user models.") parser.add_argument('-c', '--config', type=str, help="Path to the config file") args = parser.parse_args() if args.config: import json with open(args.config, "r", encoding="utf-8") as f: config = json.load(f) to_train = config['model']['mode'] model = get_model_from_config(config['model']) dataset = get_dataset_from_config(config['dataset']) attacks, attack_params = get_attacks_and_params_from_config(config['attacks']) target = targets_custom_target.CustomTarget(attacks, [dataset], [model], to_train, attack_params) print(target.build()) else: run_cli()
if __name__ == "__main__": main()