[
  {
    "path": "README.md",
    "content": "# LSTM_learn\na implement of LSTM using Keras for time series prediction regression problem\n### Data\nthe data were from internet, this data was using for predict the number of people in a airline company, we use LSTM network to solve this problem\n### Denpensies\nfor the implemention of code, we using Keras to establish LSTM network,  as well as using numpy, pandas, so before you runing this tutorial, it is strongly recommended you install Anaconda which is a package inclueded them all.\n### open source protocol\nMIT\n### contact\nauthor: jinfagang19@163.com\nCentral South University, Mr. Jin\n"
  },
  {
    "path": "international-airline-passengers.csv",
    "content": "time,passengers\r\n\"1949-01\",112\r\n\"1949-02\",118\r\n\"1949-03\",132\r\n\"1949-04\",129\r\n\"1949-05\",121\r\n\"1949-06\",135\r\n\"1949-07\",148\r\n\"1949-08\",148\r\n\"1949-09\",136\r\n\"1949-10\",119\r\n\"1949-11\",104\r\n\"1949-12\",118\r\n\"1950-01\",115\r\n\"1950-02\",126\r\n\"1950-03\",141\r\n\"1950-04\",135\r\n\"1950-05\",125\r\n\"1950-06\",149\r\n\"1950-07\",170\r\n\"1950-08\",170\r\n\"1950-09\",158\r\n\"1950-10\",133\r\n\"1950-11\",114\r\n\"1950-12\",140\r\n\"1951-01\",145\r\n\"1951-02\",150\r\n\"1951-03\",178\r\n\"1951-04\",163\r\n\"1951-05\",172\r\n\"1951-06\",178\r\n\"1951-07\",199\r\n\"1951-08\",199\r\n\"1951-09\",184\r\n\"1951-10\",162\r\n\"1951-11\",146\r\n\"1951-12\",166\r\n\"1952-01\",171\r\n\"1952-02\",180\r\n\"1952-03\",193\r\n\"1952-04\",181\r\n\"1952-05\",183\r\n\"1952-06\",218\r\n\"1952-07\",230\r\n\"1952-08\",242\r\n\"1952-09\",209\r\n\"1952-10\",191\r\n\"1952-11\",172\r\n\"1952-12\",194\r\n\"1953-01\",196\r\n\"1953-02\",196\r\n\"1953-03\",236\r\n\"1953-04\",235\r\n\"1953-05\",229\r\n\"1953-06\",243\r\n\"1953-07\",264\r\n\"1953-08\",272\r\n\"1953-09\",237\r\n\"1953-10\",211\r\n\"1953-11\",180\r\n\"1953-12\",201\r\n\"1954-01\",204\r\n\"1954-02\",188\r\n\"1954-03\",235\r\n\"1954-04\",227\r\n\"1954-05\",234\r\n\"1954-06\",264\r\n\"1954-07\",302\r\n\"1954-08\",293\r\n\"1954-09\",259\r\n\"1954-10\",229\r\n\"1954-11\",203\r\n\"1954-12\",229\r\n\"1955-01\",242\r\n\"1955-02\",233\r\n\"1955-03\",267\r\n\"1955-04\",269\r\n\"1955-05\",270\r\n\"1955-06\",315\r\n\"1955-07\",364\r\n\"1955-08\",347\r\n\"1955-09\",312\r\n\"1955-10\",274\r\n\"1955-11\",237\r\n\"1955-12\",278\r\n\"1956-01\",284\r\n\"1956-02\",277\r\n\"1956-03\",317\r\n\"1956-04\",313\r\n\"1956-05\",318\r\n\"1956-06\",374\r\n\"1956-07\",413\r\n\"1956-08\",405\r\n\"1956-09\",355\r\n\"1956-10\",306\r\n\"1956-11\",271\r\n\"1956-12\",306\r\n\"1957-01\",315\r\n\"1957-02\",301\r\n\"1957-03\",356\r\n\"1957-04\",348\r\n\"1957-05\",355\r\n\"1957-06\",422\r\n\"1957-07\",465\r\n\"1957-08\",467\r\n\"1957-09\",404\r\n\"1957-10\",347\r\n\"1957-11\",305\r\n\"1957-12\",336\r\n\"1958-01\",340\r\n\"1958-02\",318\r\n\"1958-03\",362\r\n\"1958-04\",348\r\n\"1958-05\",363\r\n\"1958-06\",435\r\n\"1958-07\",491\r\n\"1958-08\",505\r\n\"1958-09\",404\r\n\"1958-10\",359\r\n\"1958-11\",310\r\n\"1958-12\",337\r\n\"1959-01\",360\r\n\"1959-02\",342\r\n\"1959-03\",406\r\n\"1959-04\",396\r\n\"1959-05\",420\r\n\"1959-06\",472\r\n\"1959-07\",548\r\n\"1959-08\",559\r\n\"1959-09\",463\r\n\"1959-10\",407\r\n\"1959-11\",362\r\n\"1959-12\",405\r\n\"1960-01\",417\r\n\"1960-02\",391\r\n\"1960-03\",419\r\n\"1960-04\",461\r\n\"1960-05\",472\r\n\"1960-06\",535\r\n\"1960-07\",622\r\n\"1960-08\",606\r\n\"1960-09\",508\r\n\"1960-10\",461\r\n\"1960-11\",390\r\n\"1960-12\",432"
  },
  {
    "path": "lstm_airline_predict.py",
    "content": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.preprocessing import MinMaxScaler\nfrom keras.models import Sequential\nfrom keras.layers import LSTM, Dense, Activation\n\n\ndef load_data(file_name, sequence_length=10, split=0.8):\n    df = pd.read_csv(file_name, sep=',', usecols=[1])\n    data_all = np.array(df).astype(float)\n    scaler = MinMaxScaler()\n    data_all = scaler.fit_transform(data_all)\n    data = []\n    for i in range(len(data_all) - sequence_length - 1):\n        data.append(data_all[i: i + sequence_length + 1])\n    reshaped_data = np.array(data).astype('float64')\n    np.random.shuffle(reshaped_data)\n    # 对x进行统一归一化，而y则不归一化\n    x = reshaped_data[:, :-1]\n    y = reshaped_data[:, -1]\n    split_boundary = int(reshaped_data.shape[0] * split)\n    train_x = x[: split_boundary]\n    test_x = x[split_boundary:]\n\n    train_y = y[: split_boundary]\n    test_y = y[split_boundary:]\n\n    return train_x, train_y, test_x, test_y, scaler\n\n\ndef build_model():\n    # input_dim是输入的train_x的最后一个维度，train_x的维度为(n_samples, time_steps, input_dim)\n    model = Sequential()\n    model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True))\n    print(model.layers)\n    model.add(LSTM(100, return_sequences=False))\n    model.add(Dense(output_dim=1))\n    model.add(Activation('linear'))\n\n    model.compile(loss='mse', optimizer='rmsprop')\n    return model\n\n\ndef train_model(train_x, train_y, test_x, test_y):\n    model = build_model()\n\n    try:\n        model.fit(train_x, train_y, batch_size=512, nb_epoch=30, validation_split=0.1)\n        predict = model.predict(test_x)\n        predict = np.reshape(predict, (predict.size, ))\n    except KeyboardInterrupt:\n        print(predict)\n        print(test_y)\n    print(predict)\n    print(test_y)\n    try:\n        fig = plt.figure(1)\n        plt.plot(predict, 'r:')\n        plt.plot(test_y, 'g-')\n        plt.legend(['predict', 'true'])\n    except Exception as e:\n        print(e)\n    return predict, test_y\n\n\nif __name__ == '__main__':\n    train_x, train_y, test_x, test_y, scaler = load_data('international-airline-passengers.csv')\n    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))\n    test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))\n    predict_y, test_y = train_model(train_x, train_y, test_x, test_y)\n    predict_y = scaler.inverse_transform([[i] for i in predict_y])\n    test_y = scaler.inverse_transform(test_y)\n    fig2 = plt.figure(2)\n    plt.plot(predict_y, 'g:')\n    plt.plot(test_y, 'r-')\n    plt.show()\n\n"
  },
  {
    "path": "test.csv",
    "content": "1,2\n2,3\n3,4\n5,6\n7,8"
  },
  {
    "path": "test.py",
    "content": "import datetime as dt\nimport matplotlib.pyplot as plt\nimport matplotlib.dates as mdates\nimport numpy as np\nimport pandas as pd\n\ndf = pd.read_csv('test.csv', sep=',')\nprint(df.head(5))"
  }
]