## Imports and setup

In [1]:
import os, sys
import re
from six.moves.cPickle import load
import argparse
from time import time, strftime
from inspect import getmembers, isfunction
from imp import load_source
import numpy as np
from pandas import DataFrame, Series
import json
from subprocess import check_output
import pickle
import pandas as pd
# Note: We import this *before* any import of TF to avoid weird issues
# see- https://github.com/tensorflow/models/issues/523
# Is a bit of a hack but seems harmless as long as SpaCy is installed
import spacy

### Note: No Snorkel imports until after the $SNORKELDB env var is set!

expts = list(filter(lambda p : not re.match(r'.*\.pyc?$', p),
 os.listdir('experiments/')))

from utils_mderoche import ArgumentEmulator


args = ArgumentEmulator(verbose=True, exp="fda", disc_model_search_space=1, start_at=7,db_name = "_snorkel_fda_temp")
DEV_SPLIT = 1
TEST_SPLIT = 2 



In [2]:

if args.verbose:
 print(args)

# Get the DB connection string and add to globals
DB_NAME = "snorkel_" + args.exp if args.db_name is None else args.db_name
if not args.postgres:
 DB_NAME += ".db"
DB_TYPE = "postgres" if args.postgres else "sqlite"
DB_ADDR = "localhost:{0}".format(args.db_port) if args.db_port else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

# All Snorkel imports here, after SNORKELDB has been set
from snorkel.annotations import (
 LabelAnnotator, load_label_matrix, load_gold_labels, load_marginals
)
from snorkel.learning.structure import DependencySelector
from snorkel.learning import GenerativeModel, RandomSearch
from snorkel.models.meta import SnorkelBase, snorkel_engine
from snorkel.models import Document, Sentence
from snorkel import SnorkelSession

from utils import *

############################################################################
### [0] Start from clean slate: Clear the DB
############################################################################
if args.start_at == 0:
 if args.verbose > 0:
 print("Reseting DB...")
 if args.postgres:
 raise NotImplementedError("TODO: DB clearing for Postgres.")
 else:
 try:
 _ = check_output(['rm', DB_NAME])
 except:
 pass
 SnorkelBase.metadata.create_all(snorkel_engine)
# Start Snorkel sess
sess = SnorkelSession()

# Only use parallelism > 1 with UDFs if using Postgres
UDF_THREADS = args.n_threads if args.postgres else 1

# Import from parsers.py
preprocess = load_source('preprocess',
 os.path.join('experiments', args.exp, 'preprocess.py'))

# Get candidate subclass from loaded module
C = preprocess.C

# Load the config dictionary
# Load global first, then override any entries with local config file
from config import config
local_config_path = os.path.join('experiments', args.exp, 'config.py')
if os.path.exists(local_config_path):
 local_config = load_source('local_config', local_config_path)
 config = recursive_merge_dicts(config, local_config.config)
if args.verbose > 0:
 print(config)



$SNORKELDB = sqlite:///_snorkel_fda_temp.db


 import parser


{'gen-init-params': {'lf_propensity': True, 'seed': 123}, 'deps-thresh': 0.01, 'disc-params-range': {'dim': [50, 100], 'dropout': [0.1, 0.25, 0.5], 'lr': [0.0002, 1e-05, 0.0001, 0.0005, 5e-05], 'l2_penalty': [0.01, 0.001, 0.0001], 'rebalance': [0.1, 0.2, 0.05, 0.4], 'l1_penalty': [0.001, 0.0001, 1e-05]}, 'featurizer-init-params': {}, 'disc-eval-batch-size': 32, 'disc-params-default': {'dropout': 0.5, 'batch_size': 64, 'n_epochs': 750, 'beta': 0.9, 'max_sentence_length': 100, 'dim': 50, 'print_freq': 50, 'lr': 0.0002, 'l2_penalty': 0.0001, 'rebalance': 0.1, 'l1_penalty': 0.0002, 'allchecks': False}, 'gen-params-default': {'epochs': 50, 'reg_param': 0.1, 'decay': 0.95}, 'disc-model-class': , 'disc-init-params': {'seed': 123, 'n_threads': 4}, 'featurizer-class': , 'gen-params-range': {'step_size': [0.01, 0.001, 0.0001, 1e-05], 'reg_param': [0.0, 0.01, 0.1, 0.5], 'LF_acc_prior_weight_default': [0.5, 1.0, 1.5]}}


In [3]:
GEN_MODEL_NAME = 'G_final_{0}'.format(args.exp)
L_train, L_dev, L_test = None, None, None
Y_dev, Y_test = None, None
gen_model = None
if args.start_at <= 5:

 # Load L_train if starting here
 if L_train is None or L_dev is None or L_test is None:
 with PrintTimer("[5.0] Loading label matrices..."):
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 L_train = load_label_matrix(sess, cids_query=cids_query)
 else:
 L_train = load_label_matrix(sess, split=0)
 L_train = load_label_matrix(sess, split=0)
 assert L_train.nnz > 0
 L_dev = load_label_matrix(sess, split=DEV_SPLIT)
 assert L_dev.nnz > 0
 L_test = load_label_matrix(sess, split=TEST_SPLIT)
 assert L_test.nnz > 0
 if args.verbose > 0:
 print("Using L_train: {0}".format(L_train.__repr__()))
 print("Using L_dev: {0}".format(L_dev.__repr__()))
 print("Using L_test: {0}".format(L_test.__repr__()))

 # Select dependencies to model
 with PrintTimer("[5.1] Selecting dependencies..."):
 if args.deps:
 ds = DependencySelector()
 np.random.seed(args.rand_seed)
 deps = ds.select(L_train, threshold=config['deps-thresh'])
 if args.verbose > 0:
 print("Selected {0} dependencies.".format(len(deps)))
 else:
 deps = ()
 if args.verbose > 0:
 print("Skipping.")

# Run grid search to select best generative model
 with PrintTimer("[5.2] Searching over & training generative models"):
 # Load dev and test labels
 if Y_dev is None:
 Y_dev = load_gold_labels(sess, annotator_name='gold', split=DEV_SPLIT)
 assert Y_dev.nonzero()[0].shape[0] > 0
 if Y_test is None:
 Y_test = load_gold_labels(sess, annotator_name='gold', split=TEST_SPLIT)
 assert Y_test.nonzero()[0].shape[0] > 0

 # Pass in the dependencies via default params
 gen_params_default = config['gen-params-default']
 gen_params_default['deps'] = deps

 # Train generative model with grid search if applicable
 gen_model = train_model(
 GenerativeModel,
 L_train,
 X_dev=L_dev,
 Y_dev=Y_dev,
 search_size=2,#args.gen_model_search_space,
 search_params=config['gen-params-range'],
 rand_seed=args.rand_seed,
 n_threads=args.n_threads,
 verbose=(args.verbose > 0),
 params_default=gen_params_default,
 model_init_params=config['gen-init-params'],
 model_name=GEN_MODEL_NAME,
 save_dir=args.save_dir,
 beta=args.gen_f_beta
 )

 # Save training marginals
 gen_model.save_marginals(sess, L_train, training=True)

 # Score generative model on test set
 print("\n### Gen. model (DP) score on test set:")
 _ = gen_model.error_analysis(sess, L_test, Y_test, display=True)

 if args.one_only:
 sys.exit(0)



We select a model to load: 

In [None]:
DISC_MODEL_NAME = "SparseLogisticRegression_fullchk_pr3__epoch_439"

In [4]:
############################################################################
### [6] Fit discriminative model
############################################################################

# DISC_MODEL_NAME = 'D_final_{0}'.format(args.exp)

X_train, X_dev, Y_train, disc_model = None, None, None, None
F_train,F_dev,F_test,featurizer = None,None,None,None
if args.start_at <= 6:
 # Load data: candidates, dev labels, training marginals
 with PrintTimer("[6.0] Loading data"):
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 X_train = sess.query(C)\
 .filter(C.id == cids_query.subquery().c.id)\
 .order_by(C.id)\
 .all()
 Y_train = load_marginals(sess, cids_query=cids_query)
 else:
 X_train = sess.query(C).filter(C.split==0).order_by(C.id).all()
 Y_train = load_marginals(sess, split=0)
 assert len(X_train) > 0
 assert Y_train.nonzero()[0].shape[0] > 0

 X_dev = sess.query(C).filter(C.split == DEV_SPLIT).order_by(C.id).all()
 assert len(X_dev) > 0
 if Y_dev is None:
 Y_dev = load_gold_labels(sess,annotator_name='gold',split=DEV_SPLIT)
 assert Y_dev.nonzero()[0].shape[0] > 0
 if args.verbose > 0:
 print("Loaded X_train: {0}".format(len(X_train)))
 print("Loaded Y_train: {0}".format(Y_train.shape))
 print("Loaded X_dev: {0}".format(len(X_dev)))
 print("Loaded Y_dev: {0}".format(Y_dev.shape))

 if not config['disc-model-class'].representation:
 featurizer = config['featurizer-class'](**config.get('featurizer-init-params',dict()))
 with PrintTimer("[6.0.1] Computing Features"):
 if args.recompute_feats:
 F_train = featurizer.apply(split=0)
 F_dev = featurizer.apply_existing(split=DEV_SPLIT)
 else:
 F_train = featurizer.load_matrix(sess,split=0)
 F_dev = featurizer.load_matrix(sess,split=DEV_SPLIT)

 else:
 F_train = X_train
 F_dev = X_dev
 # Run grid search to select best generative model
 with PrintTimer("[6.1] Searching over & training end disc. models"):
 disc_model = train_model(
 config['disc-model-class'],
 F_train,
 Y_train=Y_train,
 X_dev=F_dev,
 Y_dev=Y_dev,
 cardinality=C.cardinality,
 search_size=args.disc_model_search_space,
 search_params=config['disc-params-range'],
 rand_seed=args.rand_seed,
 n_threads=args.n_threads,
 verbose=(args.verbose > 0),
 params_default=config['disc-params-default'],
 model_init_params=config['disc-init-params'],
 model_name=DISC_MODEL_NAME,
 save_dir=args.save_dir,
 eval_batch_size=config['disc-eval-batch-size']
 )

 if args.one_only:
 sys.exit(0)



In [5]:
 if args.start_at <= 7:
 with PrintTimer("[7.0] Loading all data for final evaluation"):
 # TODO: Reload models if needed
 if disc_model is None:
 disc_model = config['disc-model-class'](
 cardinality=C.cardinality,
 **config['disc-init-params'])
 disc_model.load(model_name=DISC_MODEL_NAME,
 save_dir=args.save_dir)

 if gen_model is None:
 gen_model = GenerativeModel(**config['gen-init-params'])
 gen_model.load(model_name=GEN_MODEL_NAME, save_dir=args.save_dir)
 if not disc_model.representation and (featurizer is None):
 featurizer = config['featurizer-class'](**config.get('featurizer-init-params', dict()))


 # TODO: Handle logistic regression as well!
 X_test = sess.query(C).filter(C.split == TEST_SPLIT).order_by(C.id).all()
 if Y_test is None:
 Y_test = load_gold_labels(sess, annotator_name='gold', split=TEST_SPLIT)
 assert Y_test.nonzero()[0].shape[0] > 0
 if L_test is None:
 L_test = load_label_matrix(sess, split=TEST_SPLIT)
 assert L_test.nnz > 0

 if F_test is None:
 if not disc_model.representation:
 if args.recompute_feats:
 F_test = featurizer.apply_existing(split=TEST_SPLIT)
 F_test = featurizer.load_matrix(sess,split=TEST_SPLIT)
 else:
 F_test = X_test

 if args.verbose > 0:
 print("Loaded X_test: {0}".format(len(X_test)))
 print("Loaded Y_test: {0}".format(Y_test.shape))
 print("Loaded F_test: representation is {}".format(disc_model.representation))

 if args.custom_error_analysis:
 with PrintTimer("[7.1] Custom error analysis exportation"):
 if X_dev is None:
 X_dev = sess.query(C).filter(C.split == DEV_SPLIT).order_by(C.id).all()
 if L_dev is None:
 L_dev = load_label_matrix(sess, split=DEV_SPLIT)
 assert L_test.nnz > 0
 if Y_dev is None:
 Y_dev = load_gold_labels(sess, annotator_name='gold', split=DEV_SPLIT)
 if F_dev is None:
 if not disc_model.representation:
 F_dev = featurizer.load_matrix(sess,split=DEV_SPLIT)
 else:
 F_dev = X_dev


 custom_report_dir = os.path.join(args.reports_dir, strftime("%Y_%m_%d"))
 custom_report_name = '{0}_{1}_custom.pkl'.format(args.exp, strftime("%H_%M_%S"))
 if not os.path.exists(custom_report_dir):
 os.makedirs(custom_report_dir)
 print "Generative Model"
 custom_report = dict()
 tp, fp, tn, fn = gen_model.error_analysis(sess, L_dev, Y_dev)
 for key,cand_list in zip(['tp', 'fp', 'tn', 'fn'],[tp, fp, tn, fn]):
 custom_report[key] = [cand.id for cand in cand_list]

 print "Discriminative Model"
 tpd, fpd, tnd, fnd = disc_model.error_analysis(sess, F_dev, Y_dev)
 for key,cand_list in zip(['tpd', 'fpd', 'tnd', 'fnd'],[tpd, fpd, tnd, fnd]):
 custom_report[key] = [cand.id for cand in cand_list]

 with open(os.path.join(custom_report_dir, custom_report_name), 'wb') as f:
 pickle.dump(custom_report, f)


 if args.export_pred:
 if X_train is None:
 X_train = sess.query(C).filter(C.split == 0).order_by(C.id).all()

 if F_train is None:
 if not disc_model.representation:
 if args.recompute_feats:
 F_train = featurizer.apply_existing(split=0)
 F_train = featurizer.load_matrix(sess,split=0)
 else:
 F_train = X_train

 if X_dev is None:
 X_dev = sess.query(C).filter(C.split == DEV_SPLIT).order_by(C.id).all()

 if F_dev is None:
 if not disc_model.representation:
 if args.recompute_feats:
 F_dev = featurizer.apply_existing(split=DEV_SPLIT)
 F_dev = featurizer.load_matrix(sess, split=DEV_SPLIT)
 else:
 F_dev = X_dev

 pred_train = disc_model.predictions(F_train,batch_size = int(len(X_train)/400)+1)
 pred_dev = disc_model.predictions(F_dev, batch_size=int(len(X_dev) / 400) + 1)
 pred_test = disc_model.predictions(F_test)

 list_dic_res= list()
 for cand,pr in zip(X_train+X_dev+X_test,list(pred_train)+list(pred_dev)+list(pred_test)):
 dico_info = dict()
 for i,c in enumerate(cand.get_contexts()):
 dico_info["cand_"+str(i)+"_start"] = c.char_start
 dico_info["cand_" + str(i) + "_end"] = c.char_end

 dico_info["sentence_pos"] = cand.get_parent().position
 dico_info["doc_id"] = cand.get_parent().document.name
 dico_info["prediction"] = pr
 list_dic_res.append(dico_info)
 custom_report_dir = os.path.join(args.reports_dir, strftime("%Y_%m_%d"))
 custom_report_name = '{0}_{1}_preds.csv'.format(args.exp, strftime("%H_%M_%S"))
 pd.DataFrame(list_dic_res).to_csv(os.path.join(custom_report_dir, custom_report_name),sep = ";")


 scores = {}
 with PrintTimer("[7.2] Evaluating heuristic baselines"):
 # Test candidate set score - applicable for binary case only
 if C.cardinality == 2:
 cs_test = np.ones(Y_test.shape[0])
 scores['CS'] = score_marginals(cs_test, Y_test)
 else:
 if args.verbose > 0:
 print("Candidate-set not applicable for categorical tasks.")

 # Test majority vote of LFs on test set
 mv_test = majority_vote_marginals(L_test, cardinality=C.cardinality)
 scores['MV'] = score_marginals(mv_test, Y_test)

 with PrintTimer("[7.3] Evaluating generative model"):
 # Score generative model on test set
 # TODO: Make sure this is the same as scuba.utils score function!!!
 np.random.seed(args.rand_seed)
 scores['Gen'] = score_marginals(gen_model.marginals(L_test), Y_test)

 with PrintTimer("[7.4] Evaluate full DP pipeline (disc. model)"):
 # Score discriminative model trained on generative model predictions
 # TODO: Make sure this is the same as scuba.utils score function!!!
 np.random.seed(args.rand_seed)
 scores['DP'] = score_marginals(disc_model.marginals(F_test,
 batch_size=config['disc-eval-batch-size']), Y_test)

 if args.ds_tests:
 with PrintTimer("[7.4] Evaluating distant supervision baseline"):
 # Score discriminative model trained on LF majority vote (hard)
 # Load data
 if L_train is None:
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 L_train = load_label_matrix(sess, cids_query=cids_query)
 else:
 L_train = load_label_matrix(sess, split=0)
 assert L_train.nnz > 0
 if X_train is None:
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 X_train = sess.query(C)\
 .filter(C.id == cids_query.subquery().c.id)\
 .order_by(C.id)\
 .all()
 else:
 X_train = sess.query(C).filter(C.split == 0)\
 .order_by(C.id).all()
 assert len(X_train) > 0
 if X_dev is None:
 X_dev = sess.query(C).filter(C.split == DEV_SPLIT)\
 .order_by(C.id).all()
 assert len(X_dev) > 0
 if Y_dev is None:
 Y_dev = load_gold_labels(sess,annotator_name='gold',split=DEV_SPLIT)
 assert Y_dev.nonzero()[0].shape[0] > 0

 # Compute soft ([0,1]) majority vote training marginals
 Y_train_mv = majority_vote_marginals(L_train,
 cardinality=C.cardinality)

 # Train discriminative model with MV training labels
 disc_model = train_model(
 config['disc-model-class'],
 X_train,
 Y_train=Y_train_mv,
 X_dev=X_dev,
 Y_dev=Y_dev,
 cardinality=C.cardinality,
 search_size=args.disc_model_search_space,
 search_params=config['disc-params-range'],
 rand_seed=args.rand_seed,
 n_threads=args.n_threads,
 verbose=(args.verbose > 0),
 params_default=config['disc-params-default'],
 model_init_params=config['disc-init-params'],
 model_name=DISC_MODEL_NAME + "_ds",
 save_dir=args.save_dir,
 eval_batch_size=config['disc-eval-batch-size']
 )
 np.random.seed(args.rand_seed)
 scores['DS-MV'] = score_marginals(disc_model.marginals(X_test,
 batch_size=config['disc-eval-batch-size']), Y_test)

 if args.supervised_tests:
 with PrintTimer("[7.5] Evaluating fully-supervised baseline"):
 # Load data
 if X_train is None:
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 X_train = sess.query(C)\
 .filter(C.id == cids_query.subquery().c.id)\
 .order_by(C.id)\
 .all()
 else:
 X_train = sess.query(C).filter(C.split == 0)\
 .order_by(C.id).all()
 assert len(X_train) > 0
 if X_dev is None:
 X_dev = sess.query(C).filter(C.split == DEV_SPLIT)\
 .order_by(C.id).all()
 assert len(X_dev) > 0
 if Y_dev is None:
 Y_dev = load_gold_labels(sess,annotator_name='gold',split=DEV_SPLIT)
 assert Y_dev.nonzero()[0].shape[0] > 0

 # Load ground-truth training set labels
 # Note we load in {0,1} not {-1,1} format for binary
 # Optionally subsample the training set here
 if args.training_docs > 0:
 cids_query = get_training_cids_query(
 sess,
 preprocess.CONTEXT_HIERARCHY,
 C,
 preprocess.CANDIDATE_CONTEXT,
 args.training_docs,
 training_docs_shuffle=args.training_docs_shuffle,
 verbose=args.verbose,
 training_splits=args.training_splits
 )
 Y_train_gt = load_gold_labels(sess, cids_query=cids_query,
 zero_one=(C.cardinality == 2), load_as_array=True,
 annotator_name='gold')
 else:
 Y_train_gt = load_gold_labels(sess, split=0,
 zero_one=(C.cardinality == 2), load_as_array=True,
 annotator_name='gold')
 assert Y_train_gt.nonzero()[0].shape[0] > 0

 # If categorical, convert to one-hot
 if C.cardinality > 2:
 Y_train_gt = labels_to_one_hot(Y_train_gt, C.cardinality)

 # Train discriminative model and score
 disc_model = train_model(
 config['disc-model-class'],
 X_train,
 Y_train=Y_train_gt,
 X_dev=X_dev,
 Y_dev=Y_dev,
 cardinality=C.cardinality,
 search_size=args.disc_model_search_space,
 search_params=config['disc-params-range'],
 rand_seed=args.rand_seed,
 n_threads=args.n_threads,
 verbose=(args.verbose > 0),
 params_default=config['disc-params-default'],
 model_init_params=config['disc-init-params'],
 model_name=DISC_MODEL_NAME + "_supervised",
 save_dir=args.save_dir,
 eval_batch_size=config['disc-eval-batch-size']
 )
 np.random.seed(args.rand_seed)
 scores['Sup'] = score_marginals(disc_model.marginals(X_test,
 batch_size=config['disc-eval-batch-size']), Y_test)

 # Print and save final score report
 ks = list(scores.keys())
 if C.cardinality > 2:
 cols = ['Accuracy', 'Coverage']
 d = {
 'Accuracy': Series(data=[scores[k][0] for k in ks], index=ks),
 'Coverage': Series(data=[scores[k][1] for k in ks], index=ks),
 }
 else:
 cols = ['Precision', 'Recall', 'F1 Score']
 d = {
 'Precision' : Series(data=[scores[k][0] for k in ks], index=ks),
 'Recall' : Series(data=[scores[k][1] for k in ks], index=ks),
 'F1 Score' : Series(data=[scores[k][2] for k in ks], index=ks),
 'Coverage' : Series(data=[scores[k][3] for k in ks], index=ks)
 }
 df = DataFrame(data=d, index=ks)
 print(df)

 # Assemble the report, to be saved as a json file
 df_scores = df.to_dict()
 row_str = print_latex_table_row(args.exp, scores,
 cardinality=C.cardinality)
 if args.verbose > 0:
 print(row_str)
 report = {
 'scores': df_scores,
 'row-string': row_str,
 'scuba-commit': git_commit_hash(),
 'snorkel-commit': git_commit_hash(path=os.environ['SNORKELHOME']),
 'args': vars(args)
 }

 # Save to file
 report_dir = os.path.join(args.reports_dir, strftime("%Y_%m_%d"))
 report_name = '{0}_{1}.json'.format(args.exp, strftime("%H_%M_%S"))
 if not os.path.exists(report_dir):
 os.makedirs(report_dir)
 with open(os.path.join(report_dir, report_name), 'wb') as f:
 json.dump(report, f, indent=2)


### [7.0] Loading all data for final evaluation
INFO:tensorflow:Restoring parameters from checkpoints/SparseLogisticRegression_fullchk_pr3__epoch_439/SparseLogisticRegression_fullchk_pr3__epoch_439-439
[SparseLogisticRegression] Loaded model 
[GenerativeModel] Model loaded.
Loaded X_test: 1232
Loaded Y_test: (1232, 1)
Loaded F_test: representation is False
### Done in 13.6s.

### [7.2] Evaluating heuristic baselines
### Done in 0.0s.

### [7.3] Evaluating generative model
### Done in 0.3s.

### [7.4] Evaluate full DP pipeline (disc. model)
### Done in 0.1s.

 Coverage F1 Score Precision Recall
CS 1.000000 0.079501 0.041396 1.000000
MV 0.727273 0.338462 0.785714 0.215686
Gen 0.731331 0.338462 0.785714 0.215686
DP 1.000000 0.552632 0.840000 0.411765
fda & 4.1 & 100.0 & 8.0 & 78.6 & 21.6 & 33.8 & 78.6 & 21.6 & 33.8 & - & - & - & 84.0 & 41.2 & 55.3 & - & - & -\\
