import dabl
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
train_data = pd.read_csv('./Titanic/train.csv')
X = train_data.drop(["Survived", "PassengerId"], axis=1)
y = train_data.Survived
model = dabl.SimpleClassifier(random_state=0, verbose=1)
model.fit(X, y)
Running DummyClassifier() accuracy: 0.616 average_precision: 0.384 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.381 === new best DummyClassifier() (using recall_macro): accuracy: 0.616 average_precision: 0.384 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.381 Running GaussianNB() accuracy: 0.424 average_precision: 0.737 roc_auc: 0.827 recall_macro: 0.529 f1_macro: 0.354 === new best GaussianNB() (using recall_macro): accuracy: 0.424 average_precision: 0.737 roc_auc: 0.827 recall_macro: 0.529 f1_macro: 0.354 Running MultinomialNB() accuracy: 0.763 average_precision: 0.783 roc_auc: 0.831 recall_macro: 0.742 f1_macro: 0.746 === new best MultinomialNB() (using recall_macro): accuracy: 0.763 average_precision: 0.783 roc_auc: 0.831 recall_macro: 0.742 f1_macro: 0.746 Running DecisionTreeClassifier(class_weight='balanced', max_depth=1) accuracy: 0.787 average_precision: 0.632 roc_auc: 0.767 recall_macro: 0.767 f1_macro: 0.771 === new best DecisionTreeClassifier(class_weight='balanced', max_depth=1) (using recall_macro): accuracy: 0.787 average_precision: 0.632 roc_auc: 0.767 recall_macro: 0.767 f1_macro: 0.771 Running DecisionTreeClassifier(class_weight='balanced', max_depth=5) accuracy: 0.788 average_precision: 0.795 roc_auc: 0.838 recall_macro: 0.790 f1_macro: 0.781 === new best DecisionTreeClassifier(class_weight='balanced', max_depth=5) (using recall_macro): accuracy: 0.788 average_precision: 0.795 roc_auc: 0.838 recall_macro: 0.790 f1_macro: 0.781 Running DecisionTreeClassifier(class_weight='balanced', min_impurity_decrease=0.01) accuracy: 0.800 average_precision: 0.788 roc_auc: 0.848 recall_macro: 0.787 f1_macro: 0.788 Running LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000) accuracy: 0.783 average_precision: 0.819 roc_auc: 0.851 recall_macro: 0.775 f1_macro: 0.772 Running LogisticRegression(class_weight='balanced', max_iter=1000) accuracy: 0.788 average_precision: 0.816 roc_auc: 0.851 recall_macro: 0.782 f1_macro: 0.778 Best model: DecisionTreeClassifier(class_weight='balanced', max_depth=5) Best Scores: accuracy: 0.788 average_precision: 0.795 roc_auc: 0.838 recall_macro: 0.790 f1_macro: 0.781
SimpleClassifier(random_state=0)
data = pd.read_csv('housing.csv')
X = data.drop(['median_house_value'], axis = 1) # DataFrame
Y = data.median_house_value
model = dabl.SimpleRegressor(random_state=0, verbose=1)
model.fit(X, Y)
Running DummyRegressor() r2: -0.000 neg_mean_squared_error: -13316097479.195 === new best DummyRegressor() (using r2): r2: -0.000 neg_mean_squared_error: -13316097479.195 Running DecisionTreeRegressor(max_depth=1) r2: 0.307 neg_mean_squared_error: -9229835649.944 === new best DecisionTreeRegressor(max_depth=1) (using r2): r2: 0.307 neg_mean_squared_error: -9229835649.944 Running DecisionTreeRegressor(max_depth=5) r2: 0.624 neg_mean_squared_error: -5008235435.627 === new best DecisionTreeRegressor(max_depth=5) (using r2): r2: 0.624 neg_mean_squared_error: -5008235435.627 Running Ridge(alpha=10) r2: 0.648 neg_mean_squared_error: -4692624257.386 === new best Ridge(alpha=10) (using r2): r2: 0.648 neg_mean_squared_error: -4692624257.386 Running Lasso(alpha=10) r2: 0.648 neg_mean_squared_error: -4690203117.089 === new best Lasso(alpha=10) (using r2): r2: 0.648 neg_mean_squared_error: -4690203117.089 Best model: Lasso(alpha=10) Best Scores: r2: 0.648 neg_mean_squared_error: -4690203117.089
SimpleRegressor(random_state=0)
titanic_clean = dabl.clean(X, verbose=0)
# detect types
types = dabl.detect_types(X)
types
continuous | dirty_float | low_card_int | categorical | date | free_string | useless | |
---|---|---|---|---|---|---|---|
Pclass | False | False | False | True | False | False | False |
Name | False | False | False | False | False | True | False |
Sex | False | False | False | True | False | False | False |
Age | True | False | False | False | False | False | False |
SibSp | False | False | True | False | False | False | False |
Parch | False | False | True | False | False | False | False |
Ticket | False | False | False | False | False | True | False |
Fare | True | False | False | False | False | False | False |
Cabin | False | False | False | False | False | True | False |
Embarked | False | False | False | True | False | False | False |
# plot
dabl.plot(train_data, 'Survived')
Target looks like classification Linear Discriminant Analysis training set score: 0.589
dabl.explain(model)
Depth: 5 Number of leaves: 27