data = load_breast_cancer()
X = data.data
X_names = data.feature_names
y = data.target
df = pd.DataFrame(X, columns=X_names)
features = ['mean radius', 'mean texture']
Xf = df[features].values
yf = data.target
ss = StandardScaler()
Xs = ss.fit_transform(Xf)
# classification tree
dt_class = tree.DecisionTreeClassifier(random_state=1693, max_depth=5)
dt_class.fit(X,y)
y_pred = dt_class.predict(X)
dt_cm = confusion_matrix(y, y_pred)
# naive bayes
nb_class = GaussianNB()
nb_class.fit(X,y)
y_pred = nb_class.predict(X)
cm = confusion_matrix(y, y_pred)
# random forest
rf_class = RandomForestClassifier(random_state=1693, max_depth=5, n_estimators = 1000)
rf_class.fit(X,y)
y_pred = rf_class.predict(X)
rf_cm = confusion_matrix(y,y_pred)
Classification Tree: 0 FN
Naive Bayes: 10 FN
Random Forest: 0 FN
def validation(X,y,k,model):
PA_IV = []
PA_EV = []
pipe = Pipeline([('scale',scale),('Classifier',model)])
kf = KFold(n_splits=k,shuffle=True,random_state=1693)
for idxtrain, idxtest in kf.split(X):
X_train = X[idxtrain,:]
y_train = y[idxtrain]
X_test = X[idxtest,:]
y_test = y[idxtest]
pipe.fit(X_train,y_train)
PA_IV.append(accuracy_score(y_train,pipe.predict(X_train)))
PA_EV.append(accuracy_score(y_test,pipe.predict(X_test)))
return np.mean(PA_IV), np.mean(PA_EV)
model = GaussianNB()
scale = StandardScaler()
validation(Xf,yf,10,model) # -> 0.8805137844611528
model = RandomForestClassifier(random_state=1693, max_depth=7, n_estimators = 100)
scale = StandardScaler()
validation(Xf,yf,10,model) # -> 0.8822368421052632
# create model
model = Sequential()
model.add(Dense(16, kernel_initializer='random_normal', input_dim=2, activation='relu'))
model.add(Dense(8, kernel_initializer='random_normal', input_dim=2, activation='relu'))
model.add(Dense(4, kernel_initializer='random_normal', input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics= 'accuracy')
# fit the model
model.fit(Xs, yf, epochs=150, verbose=0, validation_split=0.25, batch_size=10, shuffle=False)
# find accuracy
_, accuracy = model.evaluate(Xs, yf) # -> Accuracy on the Test Data: 88.92794251441956