{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n===========================================\nBag-of-SFA Symbols in Vector Space (BOSSVS)\n===========================================\n\nThis example shows how the BOSSVS algorithm transforms a dataset\nconsisting of time series and their corresponding labels into a\ndocument-term matrix using tf-idf statistics. Each class is represented\nas a tfidf vector. For an unlabeled time series, the predicted label is\nthe label of the tfidf vector giving the highest cosine similarity with\nthe tf vector of the unlabeled time series. BOSSVS algorithm is\nimplemented as :class:`pyts.classification.BOSSVS`.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom pyts.classification import BOSSVS\nfrom pyts.datasets import load_gunpoint\n\n# Toy dataset\nX_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)\n\n# BOSSVS transformation\nbossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)\nbossvs.fit(X_train, y_train)\ntfidf = bossvs.tfidf_\nvocabulary_length = len(bossvs.vocabulary_)\nX_new = bossvs.decision_function(X_test)\n\n# Visualize the transformation\nplt.figure(figsize=(14, 5))\nwidth = 0.4\n\nplt.subplot(121)\nplt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0],\n        width=width, label='Class 1')\nplt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1],\n        width=width, label='Class 2')\nplt.xticks(np.arange(vocabulary_length),\n           np.vectorize(bossvs.vocabulary_.get)(np.arange(vocabulary_length)),\n           fontsize=14)\nplt.ylim((0, 7))\nplt.xlabel(\"Words\", fontsize=14)\nplt.ylabel(\"tf-idf\", fontsize=14)\nplt.title(\"tf-idf vector for each class (training set)\", fontsize=15)\nplt.legend(loc='best')\n\nplt.subplot(122)\nn_samples_plot = 8\nplt.bar(np.arange(n_samples_plot) - width / 2, X_new[:n_samples_plot, 0],\n        width=width, label='Class 1')\nplt.bar(np.arange(n_samples_plot) + width / 2, X_new[:n_samples_plot, 1],\n        width=width, label='Class 2')\nplt.xticks(np.arange(n_samples_plot), y_test[:n_samples_plot], fontsize=14)\nplt.ylim((0, 1.2))\nplt.xlabel(\"True label\", fontsize=14)\nplt.ylabel(\"Cosine similarity\", fontsize=14)\nplt.title((\"Cosine similarity between tf-idf vectors for each class\\n\"\n           \"and tf vectors for each sample (test set)\"), fontsize=15)\nplt.legend(loc='best')\n\nplt.suptitle(\"BOSSVS\", y=0.95, fontsize=22)\nplt.tight_layout()\nplt.subplots_adjust(top=0.75)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}