Note
Click here to download the full example code
In many real-world examples, there are many ways to extract features from a dataset. Often it is beneficial to combine several methods to obtain good performance. This example shows how to use FeatureUnion
to combine features obtained by PCA and univariate selection.
Combining features using this transformer has the benefit that it allows cross validation and grid searches over the whole process.
The combination used in this example is not particularly helpful on this dataset and is only used to illustrate the usage of FeatureUnion.
Out:
Combined space has 3 features Fitting 5 folds for each of 18 candidates, totalling 90 fits [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1, score=0.8666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1, score=0.9, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1, score=0.8666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10, score=0.9, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=1, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1, score=0.8666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10, score=0.9, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10, score=0.9666666666666667, total= 0.1s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.1s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10, score=0.9, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=2, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=1, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1, score=0.9333333333333333, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=0.1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=1, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10, score=0.9, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10, score=0.9666666666666667, total= 0.0s [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10 [CV] features__pca__n_components=3, features__univ_select__k=2, svm__C=10, score=1.0, total= 0.0s Pipeline(memory=None, steps=[('features', FeatureUnion(n_jobs=None, transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=3, random_state=None, svd_solver='auto', tol=0.0, whiten=False)), ('univ_select', SelectKBest(k=1, score_func=<function f_classif at 0x7f5f3718f378>))], transfor...r', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False))])
# Author: Andreas Mueller <[email protected]> # # License: BSD 3 clause from __future__ import print_function from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest iris = load_iris() X, y = iris.data, iris.target # This dataset is way too high-dimensional. Better do PCA: pca = PCA(n_components=2) # Maybe some original features where good, too? selection = SelectKBest(k=1) # Build estimator from PCA and Univariate selection: combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)]) # Use combined features to transform dataset: X_features = combined_features.fit(X, y).transform(X) print("Combined space has", X_features.shape[1], "features") svm = SVC(kernel="linear") # Do grid search over k, n_components and C: pipeline = Pipeline([("features", combined_features), ("svm", svm)]) param_grid = dict(features__pca__n_components=[1, 2, 3], features__univ_select__k=[1, 2], svm__C=[0.1, 1, 10]) grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=5, verbose=10) grid_search.fit(X, y) print(grid_search.best_estimator_)
Total running time of the script: ( 0 minutes 1.232 seconds)
Gallery generated by Sphinx-Gallery
© 2007–2018 The scikit-learn developers
Licensed under the 3-clause BSD License.
http://scikit-learn.org/stable/auto_examples/compose/plot_feature_union.html