{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Bag of Words\n\n\nThis example shows how you can transform a discretized time series\n(i.e. a time series represented as a sequence of letters) into a bag\nof words using :class:`pyts.bag_of_words.BagOfWords`.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom pyts.bag_of_words import BagOfWords\n\n# Parameters\nn_samples, n_timestamps = 100, 48\nn_bins = 4\n\n# Toy dataset\nrng = np.random.RandomState(42)\nalphabet = np.array(['a', 'b', 'c', 'd'])\nX_ordinal = rng.randint(n_bins, size=(n_samples, n_timestamps))\nX_alphabet = alphabet[X_ordinal]\n\n# Bag-of-words transformation\nbow = BagOfWords(window_size=2, numerosity_reduction=False)\nX_bow = bow.transform(X_alphabet)\nwords = np.asarray(X_bow[0].split(' '))\ndifferent_words_idx = np.r_[True, words[1:] != words[:-1]]\n\n# Show the results\nplt.figure(figsize=(16, 7))\nplt.suptitle('Transforming a discretized time series into a bag of words',\n             fontsize=20, y=0.9)\n\nplt.subplot(121)\nplt.plot(X_ordinal[0], 'o', scalex=0.2)\nplt.yticks(np.arange(4), alphabet)\nplt.xticks([], [])\nplt.yticks(fontsize=16)\nplt.title('Without numerosity reduction', fontsize=16)\n\nfor i, word in enumerate(words):\n    plt.text(i, - 0.4 - (i % 5) / 4, word, fontsize=17, color='C0')\n\nplt.subplot(122)\nplt.plot(X_ordinal[0], 'o')\nplt.yticks(np.arange(4), alphabet)\nplt.xticks([], [])\nplt.yticks(fontsize=16)\nplt.title('With numerosity reduction', fontsize=16)\n\nfor i, (word, different_word) in enumerate(zip(words, different_words_idx)):\n    if different_word:\n        plt.text(i, - 0.4 - (i % 5) / 4, word, fontsize=17, color='C0')\n    else:\n        plt.text(i, - 0.4 - (i % 5) / 4, word, fontsize=17, color='C0',\n                 alpha=0.2)\n\nplt.tight_layout()\nplt.subplots_adjust(bottom=0.3, top=0.8)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}