[
  {
    "path": "DepressionCollected/Classification/AudioModelChecking.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nimport numpy as np\nimport pandas as pd\nimport wave\nimport re\nimport os\nimport tensorflow.compat.v1 as tf\nimport random\nimport itertools\nfrom audio_gru_whole import AudioBiLSTM\n\nfrom sklearn.preprocessing import StandardScaler\nimport pickle\n\nclass BiLSTM(nn.Module):\n    def __init__(self, rnn_layers, dropout, num_classes, audio_hidden_dims, audio_embed_size):\n        super(BiLSTM, self).__init__()\n\n        self.lstm_net_audio = nn.GRU(audio_embed_size, audio_hidden_dims,\n                                num_layers=rnn_layers, dropout=dropout, batch_first=True)\n\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(dropout),\n            nn.Linear(audio_hidden_dims, audio_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(dropout),\n            nn.Linear(audio_hidden_dims, num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1)\n        )\n\n    def forward(self, x):\n        x, _ = self.lstm_net_audio(x)\n        # x = self.bn(x)\n        x = x.sum(dim=1)\n        out = self.fc_audio(x)\n        return out\n\n# prefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\n# audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/whole_samples_clf_avid256.npz'))['arr_0'], axis=2)\n# audio_targets = np.load(os.path.join(prefix, 'Features/Audio/whole_labels_clf_avid256.npz'))['arr_0']\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']\n\naudio_dep_idxs = np.where(audio_targets == 1)[0]\naudio_non_idxs = np.where(audio_targets == 0)[0]\n\ndef standard_confusion_matrix(y_test, y_test_pred):\n    \"\"\"\n    Make confusion matrix with format:\n                  -----------\n                  | TP | FP |\n                  -----------\n                  | FN | TN |\n                  -----------\n    Parameters\n    ----------\n    y_true : ndarray - 1D\n    y_pred : ndarray - 1D\n\n    Returns\n    -------\n    ndarray - 2D\n    \"\"\"\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n    # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n    y_test_pred = y_test_pred_proba\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\nconfig = {\n    'num_classes': 2,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 256,\n    'batch_size': 4,\n    'epochs': 100,\n    'learning_rate': 1e-5,\n    'hidden_dims': 256,\n    'bidirectional': False,\n    'cuda': False\n}\n\n# audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio/BiLSTM_gru_vlad256_256_0.80.pt'))\n# audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio3/BiLSTM_gru_vlad256_256_0.89.pt'))\n# audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio2/BiLSTM_gru_vlad256_256_0.65.pt'))\n\n# model = BiLSTM(config['rnn_layers'], config['dropout'], config['num_classes'], \\\n#          config['hidden_dims'], config['embedding_size'])\n         \n# model_state_dict = {}\n# model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']\n# model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']\n# model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']\n# model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']\n\n# model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']\n# model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']\n# model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']\n# model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']\n\n# model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']\n# model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']\n# model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']\n# model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']\n# model_state_dict = audio_lstm_model.state_dict()\n# model.load_state_dict(model_state_dict, strict=False)\n\ndef evaluate(model, test_idxs):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)\n    # X_test = audio_features[test_dep_idxs+test_non_idxs]\n    # Y_test = audio_targets[test_dep_idxs+test_non_idxs]\n    X_test = audio_features[test_idxs]\n    Y_test = audio_targets[test_idxs]\n    global max_train_acc, max_acc,max_f1\n    for i in range(0, X_test.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_test.shape[0]:\n            x, y = X_test[i:], Y_test[i:]\n        else:\n            x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), Variable(torch.from_numpy(y))\n        with torch.no_grad():\n            output = model(x.squeeze(2))\n        pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))\n        \n    y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n    return precision, recall, f1_score\n\n\n# evaluate(audio_features_test, fuse_targets_test, audio_lstm_model)\n# evaluate(model)\n\nidxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']\naudio_model_paths = ['BiLSTM_gru_vlad256_256_0.67_1.pt', 'BiLSTM_gru_vlad256_256_0.67_2.pt', 'BiLSTM_gru_vlad256_256_0.63_3.pt']\nps, rs, fs = [], [], []\nfor fold in range(3):\n    train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold])), allow_pickle=True)\n    test_idxs_tmp = list(set(list(audio_dep_idxs)+list(audio_non_idxs)) - set(train_idxs_tmp))\n    audio_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Audio/{}'.format(audio_model_paths[fold])))\n\n    train_idxs, test_idxs = [], []\n    for idx in train_idxs_tmp:\n        if idx in audio_dep_idxs:\n            feat = audio_features[idx]\n            count = 0\n            resample_idxs = [0,1,2,3,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                    audio_targets = np.hstack((audio_targets, 1))\n                    train_idxs.append(len(audio_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in audio_dep_idxs:\n            feat = audio_features[idx]\n            count = 0\n            # resample_idxs = random.sample(range(6), 4)\n            resample_idxs = [0,1,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                    audio_targets = np.hstack((audio_targets, 1))\n                    test_idxs.append(len(audio_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n    p, r, f = evaluate(audio_lstm_model, test_idxs)\n    ps.append(p)\n    rs.append(r)\n    fs.append(f)\nprint('precison: {} \\n recall: {} \\n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))\n\n\n"
  },
  {
    "path": "DepressionCollected/Classification/AudioTraditionalClassifiers.py",
    "content": "from sklearn.model_selection import KFold\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.model_selection import train_test_split\n\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']\naudio_dep_idxs_tmp = np.where(audio_targets == 1)[0]\naudio_non_idxs = np.where(audio_targets == 0)[0]\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n#     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n    y_test_pred = y_test_pred_proba\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\ndef standard_confusion_matrix(y_test, y_test_pred):\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ntrain_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.65_2.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]\nprecs, recs, f1s = [], [], []\nfor idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):\n    test_idxs_tmp = list(set(list(audio_dep_idxs_tmp)+list(audio_non_idxs)) - set(train_idxs_tmp))\n    train_idxs, test_idxs = [], []\n    # depression data augmentation\n    for idx in train_idxs_tmp:\n        if idx in audio_dep_idxs_tmp:\n            feat = audio_features[idx]\n            count = 0\n            resample_idxs = [0,1,2,3,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                    audio_targets = np.hstack((audio_targets, 1))\n                    train_idxs.append(len(audio_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in audio_dep_idxs_tmp:\n            feat = audio_features[idx]\n            count = 0\n            # resample_idxs = random.sample(range(6), 4)\n            resample_idxs = [0,1,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                    audio_targets = np.hstack((audio_targets, 1))\n                    test_idxs.append(len(audio_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n\n    X_train = audio_features[train_idxs]\n    Y_train = audio_targets[train_idxs]\n    X_test = audio_features[test_idxs]\n    Y_test = audio_targets[test_idxs]\n\n    # Decision Tree\n    # from sklearn import tree\n    # clf = tree.DecisionTreeClassifier(max_depth=20)\n\n    # svm\n    # from sklearn.svm import SVC\n    # clf = SVC(kernel='sigmoid')\n\n    # rf\n    from sklearn.ensemble import RandomForestClassifier\n    clf = RandomForestClassifier(n_estimators=50)\n\n    # lr\n    # from sklearn.linear_model import LogisticRegression\n    # clf = LogisticRegression(solver='newton-cg')\n\n    clf.fit([f.flatten() for f in X_train], Y_train)\n    pred = clf.predict([f.flatten() for f in X_test])\n    # clf.fit([f.sum(axis=0) for f in X_train], Y_train)\n    # pred = clf.predict([f.sum(axis=0) for f in X_test])\n\n    y_test_pred, conf_matrix = model_performance(Y_test, pred)\n\n    # custom evaluation metrics\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n    precs.append(0 if np.isnan(precision) else precision)\n    recs.append(0 if np.isnan(recall) else recall)\n    f1s.append(0 if np.isnan(f1_score) else f1_score)\n    # precs.append(precision)\n    # recs.append(recall)\n    # f1s.append(f1_score)\nprint(np.mean(precs), np.mean(recs), np.mean(f1s))"
  },
  {
    "path": "DepressionCollected/Classification/FuseModelChecking.py",
    "content": "from fuse_net_whole import fusion_net, config, model_performance\nimport os\nimport numpy as np\nimport torch\nfrom torch.autograd import Variable\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"./\"))\nidxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']\ntext_model_paths = ['BiLSTM_128_0.67_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.66_3.pt']\naudio_model_paths = ['BiLSTM_gru_vlad256_256_0.63_1.pt', 'BiLSTM_gru_vlad256_256_0.65_2.pt', 'BiLSTM_gru_vlad256_256_0.60_3.pt']\nfuse_model_paths = ['fuse_0.69_1.pt', 'fuse_0.68_2.pt', 'fuse_0.62_3.pt']\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']\nfuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]\nfuse_targets = text_targets\nfuse_dep_idxs = np.where(text_targets == 1)[0]\nfuse_non_idxs = np.where(text_targets == 0)[0]\n\ndef evaluate(model, test_idxs):\n    model.eval()\n    pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)\n    X_test = []\n    Y_test = []\n    for idx in test_idxs:\n        X_test.append(fuse_features[idx])\n        Y_test.append(fuse_targets[idx])\n    global max_train_acc, max_acc,max_f1\n    for i in range(0, len(X_test), config['batch_size']):\n        if i + config['batch_size'] > len(X_test):\n            x, y = X_test[i:], Y_test[i:]\n        else:\n            x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        text_feature, audio_feature = model.pretrained_feature(x)\n        with torch.no_grad():\n            # concat_x = torch.cat((audio_feature, text_feature), dim=1)\n            audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()\n            text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()\n            concat_x = torch.cat((text_feature, audio_feature), dim=1)\n            output = model(concat_x)\n        pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))\n        \n    y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])\n    # custom evaluation metrics\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n\n    return precision, recall, f1_score\n\nps, rs, fs = [], [], []\nfor fold in range(3):\n    train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold])), allow_pickle=True)\n    test_idxs_tmp = list(set(list(fuse_dep_idxs)+list(fuse_non_idxs)) - set(train_idxs_tmp))\n    resample_idxs = list(range(6))\n    train_idxs, test_idxs = [], []\n    # depression data augmentation\n    for idx in train_idxs_tmp:\n        if idx in fuse_dep_idxs:\n            feat = fuse_features[idx]\n            audio_perm = itertools.permutations(feat[0], 3)\n            text_perm = itertools.permutations(feat[1], 3)\n            count = 0\n            for fuse_perm in zip(audio_perm, text_perm):\n                if count in resample_idxs:\n                    fuse_features.append(fuse_perm)\n                    fuse_targets = np.hstack((fuse_targets, 1))\n                    train_idxs.append(len(fuse_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in fuse_dep_idxs:\n            feat = fuse_features[idx]\n            audio_perm = itertools.permutations(feat[0], 3)\n            text_perm = itertools.permutations(feat[1], 3)\n            count = 0\n            resample_idxs = [0,1,4,5]\n            for fuse_perm in zip(audio_perm, text_perm):\n                if count in resample_idxs:\n                    fuse_features.append(fuse_perm)\n                    fuse_targets = np.hstack((fuse_targets, 1))\n                    test_idxs.append(len(fuse_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n    \n    fuse_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Fuse/{}'.format(fuse_model_paths[fold])))\n    p, r, f = evaluate(fuse_model, test_idxs)\n    ps.append(p)\n    rs.append(r)\n    fs.append(f)\nprint('precison: {} \\n recall: {} \\n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))\n"
  },
  {
    "path": "DepressionCollected/Classification/TextModelChecking.py",
    "content": "\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nimport numpy as np\nimport pandas as pd\nimport wave\nimport re\nimport os\nimport tensorflow.compat.v1 as tf\nimport random\nimport itertools\n\nfrom sklearn.preprocessing import StandardScaler\nimport pickle\n\n# prefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\n# text_features = np.load(os.path.join(prefix, 'Features/Text/whole_samples_clf_avg.npz'))['arr_0']\n# text_targets = np.load(os.path.join(prefix, 'Features/Text/whole_labels_clf_avg.npz'))['arr_0']\n\n# audio_dep_idxs = np.where(text_targets == 1)[0]\n# audio_non_idxs = np.where(text_targets == 0)[0]\n# # train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.80.npy'), allow_pickle=True)\n# # train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.80.npy'), allow_pickle=True))\n# # train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.65_2.npy'), allow_pickle=True)\n# # train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.65_2.npy'), allow_pickle=True))\n# train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.89_3.npy'), allow_pickle=True)\n# train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.89_3.npy'), allow_pickle=True))\n\n# test_dep_idxs_tmp = list(set(audio_dep_idxs) - set(train_dep_idxs_tmp))\n# test_non_idxs = list(set(audio_non_idxs) - set(train_non_idxs))\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\ntext_features = np.load(os.path.join(\n    prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(\n    prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']\ntext_dep_idxs_tmp = np.where(text_targets == 1)[0]\ntext_non_idxs = np.where(text_targets == 0)[0]\n\n\n\n\n# # training data augmentation\n# train_dep_idxs = []\n# for idx in train_dep_idxs_tmp:\n#     feat = text_features[idx]\n#     for i in itertools.permutations(feat, feat.shape[0]):\n#         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n#         text_targets = np.hstack((text_targets, 1))\n#         train_dep_idxs.append(len(text_features)-1)\n\n#         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n#         text_targets = np.hstack((text_targets, 1))\n#         train_dep_idxs.append(len(text_features)-1)\n\n# # test data augmentation\n# test_dep_idxs = []\n# for idx in test_dep_idxs_tmp:\n#     feat = text_features[idx]\n#     for i in itertools.permutations(feat, feat.shape[0]):\n#         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n#         text_targets = np.hstack((text_targets, 1))\n#         test_dep_idxs.append(len(text_features)-1)\n\ndef standard_confusion_matrix(y_test, y_test_pred):\n    \"\"\"\n    Make confusion matrix with format:\n                  -----------\n                  | TP | FP |\n                  -----------\n                  | FN | TN |\n                  -----------\n    Parameters\n    ----------\n    y_true : ndarray - 1D\n    y_pred : ndarray - 1D\n\n    Returns\n    -------\n    ndarray - 2D\n    \"\"\"\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n    # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n    y_test_pred = y_test_pred_proba\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\n\nclass TextBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(TextBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        self.init_weight()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=self.bidirectional)\n\n        # self.init_weight()\n\n        # FC层\n        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1),\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n\n        # x : [len_seq, batch_size, embedding_dim]\n        x = x.permute(1, 0, 2)\n        output, (final_hidden_state, final_cell_state) = self.lstm_net(x)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        return self.fc_out(atten_out)\n\nclass BiLSTM(nn.Module):\n    def __init__(self, rnn_layers, dropout, num_classes, text_hidden_dims, text_embed_size):\n        super(BiLSTM, self).__init__()\n\n        self.text_embed_size = text_embed_size\n        self.text_hidden_dims = text_hidden_dims\n        self.rnn_layers = rnn_layers\n        self.dropout = dropout\n        self.num_classes = num_classes\n\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=True)\n        # FC层\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.text_hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1),\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x_text):\n        # x : [len_seq, batch_size, embedding_dim]\n        x_text = x_text.permute(1, 0, 2)\n        output, (final_hidden_state, _) = self.lstm_net(x_text)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        text_feature = self.fc_out(atten_out)\n\n        return text_feature\n\ndef evaluate(model, test_idxs):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)\n    # X_test = text_features[test_dep_idxs+test_non_idxs]\n    # Y_test = text_targets[test_dep_idxs+test_non_idxs]\n    X_test = text_features[test_idxs]\n    Y_test = text_targets[test_idxs]\n    global max_train_acc, max_acc, max_f1\n    for i in range(0, X_test.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_test.shape[0]:\n            x, y = X_test[i:], Y_test[i:]\n        else:\n            x, y = X_test[i:(i+config['batch_size'])\n                          ], Y_test[i:(i+config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(\n            ),             Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(\n                torch.FloatTensor), requires_grad=True), Variable(torch.from_numpy(y))\n        with torch.no_grad():\n            output = model(x.squeeze(2))\n        pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))\n\n    y_test_pred, conf_matrix = model_performance(\n        Y_test, pred[config['batch_size']:])\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] +\n                     conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / \\\n        (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n    return precision, recall, f1_score\n\ntext_model_paths = ['BiLSTM_128_0.64_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.66_3.pt']\ntrain_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),\n                   np.load(os.path.join(\n                       prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),\n                   np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]\nresample_idxs = [0, 1, 2, 3, 4, 5]\nfold = 1\nps, rs, fs = [], [], []\nfor idx_i, train_idxs_tmp in enumerate(train_idxs_tmps):\n    test_idxs_tmp = list(\n        set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))\n    train_idxs, test_idxs = [], []\n    # depression data augmentation\n    for idx in train_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack(\n                        (text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    train_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            # resample_idxs = random.sample(range(6), 4)\n            resample_idxs = [0,1,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack(\n                        (text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    test_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n\n    config = {\n        'num_classes': 2,\n        'dropout': 0.5,\n        'rnn_layers': 2,\n        'embedding_size': 1024,\n        'batch_size': 4,\n        'epochs': 100,\n        'learning_rate': 2e-5,\n        'hidden_dims': 128,\n        'bidirectional': True,\n        'cuda': False,\n    }\n\n    text_lstm_model = torch.load(os.path.join(\n        prefix, 'Model/ClassificationWhole/Text/{}'.format(text_model_paths[idx_i])))\n\n    model = BiLSTM(config['rnn_layers'], config['dropout'], config['num_classes'],\n                   config['hidden_dims'], config['embedding_size'])\n\n    # model_state_dict = {}\n    # model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']\n    # model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']\n    # model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']\n    # model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']\n\n    # model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']\n    # model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']\n    # model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']\n    # model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']\n\n    # model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']\n    # model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']\n    # model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']\n    # model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']\n    # model_state_dict = text_lstm_model.state_dict()\n    # model.load_state_dict(model_state_dict)\n\n    # evaluate(text_features_test, fuse_targets_test, audio_lstm_model)\n    # evaluate(model, test_idxs)\n    \n    p, r, f = evaluate(text_lstm_model, test_idxs)\n    ps.append(p)\n    rs.append(r)\n    fs.append(f)\nprint('precison: {} \\n recall: {} \\n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))\n"
  },
  {
    "path": "DepressionCollected/Classification/TextTraditionalClassifiers.py",
    "content": "from sklearn.model_selection import KFold\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.model_selection import train_test_split\n\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']\ntext_dep_idxs_tmp = np.where(text_targets == 1)[0]\ntext_non_idxs = np.where(text_targets == 0)[0]\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n#     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n    y_test_pred = y_test_pred_proba\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\ndef standard_confusion_matrix(y_test, y_test_pred):\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ntrain_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.65_2.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]\nprecs, recs, f1s = [], [], []\n\nfor idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):\n    test_idxs_tmp = list(set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))\n    train_idxs, test_idxs = [], []\n\n    # depression data augmentation\n    for idx in train_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            resample_idxs = [0,1,2,3,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    train_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            # resample_idxs = random.sample(range(6), 4)\n            resample_idxs = [0,1,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    test_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n    # train_idxs = train_idxs_tmp\n    # test_idxs = test_idxs_tmp\n\n    X_train = text_features[train_idxs]\n    Y_train = text_targets[train_idxs]\n    X_test = text_features[test_idxs]\n    Y_test = text_targets[test_idxs]\n\n    # Decision Tree\n    from sklearn import tree\n    clf = tree.DecisionTreeClassifier(max_depth=20)\n\n    # svm\n    # from sklearn.svm import SVC\n    # clf = SVC(kernel='rbf', gamma='auto')\n\n    # rf\n    # from sklearn.ensemble import RandomForestClassifier\n    # clf = RandomForestClassifier(n_estimators=10, max_depth=20)\n\n    # lr\n    # from sklearn.linear_model import LogisticRegression\n    # clf = LogisticRegression()\n\n    clf.fit([f.flatten() for f in X_train], Y_train)\n    pred = clf.predict([f.flatten() for f in X_test])\n    # clf.fit([f.sum(axis=0) for f in X_train], Y_train)\n    # pred = clf.predict([f.sum(axis=0) for f in X_test])\n\n    y_test_pred, conf_matrix = model_performance(Y_test, pred)\n\n    # custom evaluation metrics\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n    # precs.append(0 if np.isnan(precision) else precision)\n    # recs.append(0 if np.isnan(recall) else recall)\n    # f1s.append(0 if np.isnan(f1_score) else f1_score)\n    precs.append(precision)\n    recs.append(recall)\n    f1s.append(f1_score)\nprint(np.mean(precs), np.mean(recs), np.mean(f1s))"
  },
  {
    "path": "DepressionCollected/Classification/audio_features_whole.py",
    "content": "import os\nimport numpy as np\nimport pandas as pd\nimport wave\nimport librosa\nfrom python_speech_features import *\nimport sys\nimport pickle\nsys.path.append('/Users/linlin/Desktop/depression/classfication')\n\nimport tensorflow.compat.v1 as tf\n\nimport vggish.vggish_input as vggish_input\nimport vggish.vggish_params as vggish_params\nimport vggish.vggish_postprocess as vggish_postprocess\nimport vggish.vggish_slim as vggish_slim\n\nimport loupe_keras as lpk\n\nfrom allennlp.commands.elmo import ElmoEmbedder\n\ntf.enable_eager_execution()\n\nelmo = ElmoEmbedder()\n\nos.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\n\n# Paths to downloaded VGGish files.\ncheckpoint_path =os.path.join(os.getcwd(),  'vggish/vggish_model.ckpt')\npca_params_path = os.path.join(os.getcwd(), 'vggish/vggish_pca_params.npz')\n\ncluster_size = 16\n\nmin_len = 100\nmax_len = -1\n\ndef to_vggish_embedds(x, sr):\n    # x为输入的音频，sr为sample_rate\n    input_batch = vggish_input.waveform_to_examples(x, sr)\n    with tf.Graph().as_default(), tf.Session() as sess:\n      vggish_slim.define_vggish_slim()\n      vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)\n\n      features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)\n      embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)\n      [embedding_batch] = sess.run([embedding_tensor],\n                                   feed_dict={features_tensor: input_batch})\n\n    # Postprocess the results to produce whitened quantized embeddings.\n    pproc = vggish_postprocess.Postprocessor(pca_params_path)\n    postprocessed_batch = pproc.postprocess(embedding_batch)\n    \n    return tf.cast(postprocessed_batch, dtype='float32')\n\ndef wav2vlad(wave_data, sr):\n    global cluster_size\n    signal = wave_data\n    melspec = librosa.feature.melspectrogram(signal, n_mels=80,sr=sr).astype(np.float32).T\n    melspec = np.log(np.maximum(1e-6, melspec))\n    feature_size = melspec.shape[1]\n    max_samples = melspec.shape[0]\n    output_dim = cluster_size * 16\n    feat = lpk.NetVLAD(feature_size=feature_size, max_samples=max_samples, \\\n                            cluster_size=cluster_size, output_dim=output_dim) \\\n                                (tf.convert_to_tensor(melspec))\n    with tf.Session() as sess:\n        init = tf.global_variables_initializer()\n        sess.run(init)\n        r = feat.numpy()\n    return r\n        \ndef extract_features(number, audio_features, targets, path):\n    global max_len, min_len\n    if not os.path.exists(os.path.join(prefix, '{1}/{0}/positive_out.wav'.format(number, path))):\n        return    \n    positive_file = wave.open(os.path.join(prefix, '{1}/{0}/positive_out.wav'.format(number, path)))\n    sr1 = positive_file.getframerate()\n    nframes1 = positive_file.getnframes()\n    wave_data1 = np.frombuffer(positive_file.readframes(nframes1), dtype=np.short).astype(np.float)\n    len1 = nframes1 / sr1\n\n    neutral_file = wave.open(os.path.join(prefix, '{1}/{0}/neutral_out.wav'.format(number, path)))\n    sr2 = neutral_file.getframerate()\n    nframes2 = neutral_file.getnframes()\n    wave_data2 = np.frombuffer(neutral_file.readframes(nframes2), dtype=np.short).astype(np.float)\n    len2 = nframes2 / sr2\n\n    negative_file = wave.open(os.path.join(prefix, '{1}/{0}/negative_out.wav'.format(number, path)))\n    sr3 = negative_file.getframerate()\n    nframes3 = negative_file.getnframes()\n    wave_data3 = np.frombuffer(negative_file.readframes(nframes3), dtype=np.short).astype(np.float)\n    len3 = nframes3/sr3\n\n    for l in [len1, len2, len3]:\n        if l > max_len:\n            max_len = l\n        if l < min_len:\n            min_len = l\n\n    with open(os.path.join(prefix, '{1}/{0}/new_label.txt'.format(number, path))) as fli:\n        target = float(fli.readline())\n    \n    if wave_data1.shape[0] < 1:\n        wave_data1 = np.array([1e-4]*sr1*5)\n    if wave_data2.shape[0] < 1:\n        wave_data2 = np.array([1e-4]*sr2*5)\n    if wave_data3.shape[0] < 1:\n        wave_data3 = np.array([1e-4]*sr3*5)  \n    audio_features.append([wav2vlad(wave_data1, sr1), wav2vlad(wave_data2, sr2), \\\n        wav2vlad(wave_data3, sr3)])\n    # targets.append(1 if target >= 53 else 0)\n    targets.append(target)\n\n\naudio_features = []\naudio_targets = []\n\nfor index in range(114):\n    extract_features(index+1, audio_features, audio_targets, 'Data')\n\nfor index in range(114):\n    extract_features(index+1, audio_features, audio_targets, 'ValidationData')\n\n\nprint(\"Saving npz file locally...\")\nnp.savez(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_%d.npz'%(cluster_size*16)), audio_features)\nnp.savez(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_%d.npz')%(cluster_size*16), audio_targets)\n\nprint(max_len, min_len)"
  },
  {
    "path": "DepressionCollected/Classification/audio_gru_whole.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import KFold\n\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']\naudio_dep_idxs_tmp = np.where(audio_targets == 1)[0]\naudio_non_idxs = np.where(audio_targets == 0)[0]\n\nclass AudioBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(AudioBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        # self.init_weight()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if not 'ln' in name:\n                if 'bias' in name:\n                    nn.init.constant_(param, 0.0)\n                elif 'weight' in name:\n                    nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True))\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # self.lstm_net_audio = nn.LSTM(self.embedding_size,\n        #                         self.hidden_dims,\n        #                         num_layers=self.rnn_layers,\n        #                         dropout=self.dropout,\n        #                         bidirectional=self.bidirectional,\n        #                         batch_first=True)\n        self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)\n\n        self.ln = nn.LayerNorm(self.embedding_size)\n\n        # FC层\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1)\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        #         h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n       # print(atten_w.shape, m.transpose(1, 2).shape)\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        x = self.ln(x)\n        x, _ = self.lstm_net_audio(x)\n        x = x.mean(dim=1)\n        out = self.fc_audio(x)\n        return out\n\nconfig = {\n    'num_classes': 2,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 256,\n    'batch_size': 8,\n    'epochs': 170,\n    'learning_rate': 6e-6,\n    'hidden_dims': 256,\n    'bidirectional': False,\n    'cuda': False\n}\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n\ndef standard_confusion_matrix(y_test, y_test_pred):\n    \"\"\"\n    Make confusion matrix with format:\n                  -----------\n                  | TP | FP |\n                  -----------\n                  | FN | TN |\n                  -----------\n    Parameters\n    ----------\n    y_true : ndarray - 1D\n    y_pred : ndarray - 1D\n\n    Returns\n    -------\n    ndarray - 2D\n    \"\"\"\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test.cpu().numpy(), y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n    y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred.numpy())\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\ndef train(epoch, train_idxs):\n    global lr, train_acc\n    model.train()\n    batch_idx = 1      \n    total_loss = 0\n    correct = 0\n    pred = np.array([])\n    X_train = audio_features[train_idxs]\n    Y_train = audio_targets[train_idxs]\n    for i in range(0, X_train.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_train.shape[0]:\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(\n                i + config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(y))\n\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        output = model(x)\n        pred = output.data.max(1, keepdim=True)[1]\n        #print(pred.shape, y.shape)\n        correct += pred.eq(y.data.view_as(pred)).cpu().sum()\n        loss = criterion(output, y)\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        total_loss += loss.item()\n\n    train_acc = correct\n    print(\n        'Train Epoch: {:2d}\\t Learning rate: {:.4f}\\tLoss: {:.6f}\\t Accuracy: {}/{} ({:.0f}%)\\n '\n        .format(epoch + 1, config['learning_rate'], total_loss, correct,\n                X_train.shape[0], 100. * correct / X_train.shape[0]))\n\n\ndef evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global max_f1, max_acc, min_mae, X_test_lens, max_prec, max_rec\n    pred = np.array([])\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(audio_targets[test_idxs])).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(audio_targets[test_idxs])).type(torch.LongTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y)\n        total_loss += loss.item()\n        y_test_pred, conf_matrix = model_performance(y, output.cpu())\n        accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n        precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n        recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n        f1_score = 2 * (precision * recall) / (precision + recall)\n        print(\"Accuracy: {}\".format(accuracy))\n        print(\"Precision: {}\".format(precision))\n        print(\"Recall: {}\".format(recall))\n        print(\"F1-Score: {}\\n\".format(f1_score))\n        print('=' * 89)\n\n        if max_f1 <= f1_score and train_acc > len(train_idxs)*0.90  and f1_score > 0.5:\n            max_f1 = f1_score\n            max_acc = accuracy\n            max_rec = recall\n            max_prec = precision\n            mode ='gru'\n            save(model, os.path.join(prefix, 'Model/ClassificationWhole/Audio/BiLSTM_{}_vlad{}_{}_{:.2f}_{}'.format(mode, config['embedding_size'], config['hidden_dims'], max_f1, fold)))\n            np.save(os.path.join(prefix, 'Features/TextWhole/train_idxs_{:.2f}_{}.npy'.format(f1_score, fold)), train_idxs_tmp)\n            print('*' * 64)\n            print('model saved: f1: {}\\tacc: {}'.format(max_f1, max_acc))\n            print('*' * 64)\n\n    return total_loss\n\ndef get_param_group(model):\n    nd_list = []\n    param_list = []\n    for name, param in model.named_parameters():\n        if 'ln' in name:\n            nd_list.append(param)\n        else:\n            param_list.append(param)\n    return [{'params': param_list, 'weight_decay': 1e-5}, {'params': nd_list, 'weight_decay': 0}]\n\nif __name__ == '__main__':\n    # kf = KFold(n_splits=3, shuffle=True)\n    # fold = 1\n    # for train_idxs_tmp, test_idxs_tmp in kf.split(audio_features):\n    train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),\n    np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),\n    np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]\n    for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):\n        fold = idx_idx + 1\n        # if idx_idx != 1:\n        #     continue\n        test_idxs_tmp = list(set(list(audio_dep_idxs_tmp)+list(audio_non_idxs)) - set(train_idxs_tmp))\n        train_idxs, test_idxs = [], []\n        resample_idxs = [0,1,2,3,4,5]\n        # depression data augmentation\n        for idx in train_idxs_tmp:\n            if idx in audio_dep_idxs_tmp:\n                feat = audio_features[idx]\n                count = 0\n                for i in itertools.permutations(feat, feat.shape[0]):\n                    if count in resample_idxs:\n                        audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                        audio_targets = np.hstack((audio_targets, 1))\n                        train_idxs.append(len(audio_features)-1)\n                    count += 1\n            else:\n                train_idxs.append(idx)\n\n        for idx in test_idxs_tmp:\n            if idx in audio_dep_idxs_tmp:\n                feat = audio_features[idx]\n                count = 0\n                # resample_idxs = random.sample(range(6), 4)\n                resample_idxs = [0,1,4,5]\n                for i in itertools.permutations(feat, feat.shape[0]):\n                    if count in resample_idxs:\n                        audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                        audio_targets = np.hstack((audio_targets, 1))\n                        test_idxs.append(len(audio_features)-1)\n                    count += 1\n            else:\n                test_idxs.append(idx)\n            # test_idxs.append(idx)\n\n        model = AudioBiLSTM(config)\n\n        if config['cuda']:\n            model = model.cuda()\n\n        param_group = get_param_group(model)\n        optimizer = optim.AdamW(param_group, lr=config['learning_rate'])\n        criterion = nn.CrossEntropyLoss()\n        # criterion = FocalLoss(class_num=2)\n        max_f1 = -1\n        max_acc = -1\n        max_rec = -1\n        max_prec = -1\n        train_acc = -1\n\n        for ep in range(1, config['epochs']):\n            train(ep, train_idxs)\n            tloss = evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs)\n        fold += 1"
  },
  {
    "path": "DepressionCollected/Classification/fuse_net_whole.py",
    "content": "\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nimport numpy as np\nimport pandas as pd\nimport wave\nimport librosa\nfrom python_speech_features import *\nimport re\nfrom allennlp.commands.elmo import ElmoEmbedder\nimport os\nimport tensorflow.compat.v1 as tf\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"./\"))\n\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']\nfuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]\nfuse_targets = text_targets\n\nfuse_dep_idxs = np.where(text_targets == 1)[0]\nfuse_non_idxs = np.where(text_targets == 0)[0]\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n    \ndef standard_confusion_matrix(y_test, y_test_pred):\n    \"\"\"\n    Make confusion matrix with format:\n                  -----------\n                  | TP | FP |\n                  -----------\n                  | FN | TN |\n                  -----------\n    Parameters\n    ----------\n    y_true : ndarray - 1D\n    y_pred : ndarray - 1D\n\n    Returns\n    -------\n    ndarray - 2D\n    \"\"\"\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n    # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n    y_test_pred = y_test_pred_proba\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\nclass TextBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(TextBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        self.init_weight()\n        \n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=self.bidirectional)\n        \n        # self.init_weight()\n        \n        # FC层\n        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1),\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        \n        # x : [len_seq, batch_size, embedding_dim]\n        x = x.permute(1, 0, 2)\n        output, (final_hidden_state, final_cell_state) = self.lstm_net(x)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        return self.fc_out(atten_out)\n\nclass AudioBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(AudioBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        # self.init_weight()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if not 'ln' in name:\n                if 'bias' in name:\n                    nn.init.constant_(param, 0.0)\n                elif 'weight' in name:\n                    nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True))\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # self.lstm_net_audio = nn.LSTM(self.embedding_size,\n        #                         self.hidden_dims,\n        #                         num_layers=self.rnn_layers,\n        #                         dropout=self.dropout,\n        #                         bidirectional=self.bidirectional,\n        #                         batch_first=True)\n        self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)\n\n        self.ln = nn.LayerNorm(self.embedding_size)\n\n        # FC层\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1)\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        #         h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n       # print(atten_w.shape, m.transpose(1, 2).shape)\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        x = self.ln(x)\n        x, _ = self.lstm_net_audio(x)\n        x = x.mean(dim=1)\n        out = self.fc_audio(x)\n        return out\n\nclass fusion_net(nn.Module):\n    def __init__(self, text_embed_size, text_hidden_dims, rnn_layers, dropout, num_classes, \\\n         audio_hidden_dims, audio_embed_size):\n        super(fusion_net, self).__init__()\n        self.text_embed_size = text_embed_size\n        self.audio_embed_size = audio_embed_size\n        self.text_hidden_dims = text_hidden_dims\n        self.audio_hidden_dims = audio_hidden_dims\n        self.rnn_layers = rnn_layers\n        self.dropout = dropout\n        self.num_classes = num_classes\n        \n        # ============================= TextBiLSTM =================================\n        \n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=True)\n        # FC层\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout)\n        )\n        \n        # ============================= TextBiLSTM =================================\n\n        # ============================= AudioBiLSTM =============================\n\n        self.lstm_net_audio = nn.GRU(self.audio_embed_size,\n                                self.audio_hidden_dims,\n                                num_layers=self.rnn_layers,\n                                dropout=self.dropout,\n                                bidirectional=False,\n                                batch_first=True)\n\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.audio_hidden_dims, self.audio_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout)\n        )\n\n        self.ln = nn.LayerNorm(self.audio_embed_size)\n        \n        # ============================= AudioBiLSTM =============================\n\n        # ============================= last fc layer =============================\n        # self.bn = nn.BatchNorm1d(self.text_hidden_dims + self.audio_hidden_dims)\n        # modal attention\n        self.modal_attn = nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.text_hidden_dims + self.audio_hidden_dims, bias=False)\n        self.fc_final = nn.Sequential(\n            nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.num_classes, bias=False),\n            # nn.ReLU(),\n            nn.Softmax(dim=1),\n            # nn.Sigmoid()\n        )\n        \n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n    \n    def pretrained_feature(self, x):\n        with torch.no_grad():\n            x_text = []\n            x_audio = []\n            for ele in x:\n                x_text.append(ele[1])\n                x_audio.append(ele[0])\n            x_text, x_audio = Variable(torch.tensor(x_text).type(torch.FloatTensor), requires_grad=False), Variable(torch.tensor(x_audio).type(torch.FloatTensor), requires_grad=False)\n            # ============================= TextBiLSTM =================================\n            # x : [len_seq, batch_size, embedding_dim]\n            x_text = x_text.permute(1, 0, 2)\n            output, (final_hidden_state, _) = self.lstm_net(x_text)\n            # output : [batch_size, len_seq, n_hidden * 2]\n            output = output.permute(1, 0, 2)\n            # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n            final_hidden_state = final_hidden_state.permute(1, 0, 2)\n            # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n            # atten_out = self.attention_net(output, final_hidden_state)\n            atten_out = self.attention_net_with_w(output, final_hidden_state)\n            text_feature = self.fc_out(atten_out)\n\n            # ============================= TextBiLSTM =================================\n\n            # ============================= AudioBiLSTM =============================\n            x_audio = self.ln(x_audio)\n            x_audio, _ = self.lstm_net_audio(x_audio)\n            x_audio = x_audio.sum(dim=1)\n            audio_feature = self.fc_audio(x_audio)\n\n        # ============================= AudioBiLSTM =============================\n        return (text_feature, audio_feature)\n        \n    def forward(self, x): \n        # x = self.bn(x)\n        # modal_weights = torch.softmax(self.modal_attn(x), dim=1)\n        # modal_weights = self.modal_attn(x)\n        # x = (modal_weights * x)\n        output = self.fc_final(x)\n        return output\n    \nclass MyLoss(nn.Module):\n    def __init__(self):\n        super(MyLoss, self).__init__()\n        \n    def forward(self, text_feature, audio_feature, target, model):\n        weight = model.fc_final[0].weight\n        # bias = model.fc_final[0].bias\n        # print(weight, bias)\n        pred_text = F.linear(text_feature, weight[:, :config['text_hidden_dims']])\n        pred_audio = F.linear(audio_feature, weight[:, config['text_hidden_dims']:])\n        l = nn.CrossEntropyLoss()\n        target = torch.tensor(target)\n        # l = nn.BCEWithLogitsLoss()\n        # target = F.one_hot(target, num_classes=2).type(torch.FloatTensor)\n        # print('y: {}\\npred_audio: {}\\npred_text: {}\\n'.format(target, pred_audio.data.max(1, keepdim=True)[1], pred_text.data.max(1, keepdim=True)[1]))\n        # return l(pred_text, target) + l(pred_audio, target) + \\\n        #         config['lambda']*torch.norm(weight[:, :config['text_hidden_dims']]) + \\\n        #         config['lambda']*torch.norm(weight[:, config['text_hidden_dims']:])  \n        # a = F.softmax(pred_text, dim=1) + F.softmax(pred_audio, dim=1)\n        return l(pred_text, target) + l(pred_audio, target)\n    \n\nconfig = {\n    'num_classes': 2,\n    'dropout': 0.3,\n    'rnn_layers': 2,\n    'audio_embed_size': 256,\n    'text_embed_size': 1024,\n    'batch_size': 2,\n    'epochs': 100,\n    'learning_rate': 8e-6,\n    'audio_hidden_dims': 256,\n    'text_hidden_dims': 128,\n    'cuda': False,\n    'lambda': 1e-5,\n}\n\nmodel = fusion_net(config['text_embed_size'], config['text_hidden_dims'], config['rnn_layers'], \\\n    config['dropout'], config['num_classes'], config['audio_hidden_dims'], config['audio_embed_size'])\n\noptimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])\n# optimizer = optim.Adam(model.parameters())\n# criterion = nn.CrossEntropyLoss()\ncriterion = MyLoss()\n\ndef train(epoch, train_idxs):\n    global max_train_acc, train_acc\n    model.train()\n    batch_idx = 1\n    total_loss = 0\n    correct = 0\n    X_train = []\n    Y_train = []\n    for idx in train_idxs:\n        X_train.append(fuse_features[idx])\n        Y_train.append(fuse_targets[idx])\n    for i in range(0, len(X_train), config['batch_size']):\n        if i + config['batch_size'] > len(X_train):\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i+config['batch_size'])], Y_train[i:(i+config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        text_feature, audio_feature = model.pretrained_feature(x)\n        # text_feature = torch.from_numpy(ss.fit_transform(text_feature.numpy()))\n        # audio_feature = torch.from_numpy(ss.fit_transform(audio_feature.numpy()))\n        # concat_x = torch.cat((audio_feature, text_feature), dim=1)\n        concat_x = torch.cat((text_feature, audio_feature), dim=1)\n        # dot_x = text_feature.mul(audio_feature)\n        # add_x = text_feature.add(audio_feature)\n        output = model(concat_x)\n        pred = output.data.max(1, keepdim=True)[1]\n        correct += pred.eq(torch.tensor(y).data.view_as(pred)).cpu().sum()\n        # loss = criterion(output, torch.tensor(y))\n        loss = criterion(text_feature, audio_feature, y, model)\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        total_loss += loss.item()\n    cur_loss = total_loss\n    max_train_acc = correct\n    train_acc = correct\n    print('Train Epoch: {:2d}\\t Learning rate: {:.4f}\\tLoss: {:.6f}\\t Accuracy: {}/{} ({:.0f}%)\\n '.format(\n                epoch, config['learning_rate'], cur_loss/len(X_train), correct, len(X_train),\n        100. * correct / len(X_train)))\n\n\ndef evaluate(model, test_idxs, fold, train_idxs):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)\n    X_test = []\n    Y_test = []\n    for idx in test_idxs:\n        X_test.append(fuse_features[idx])\n        Y_test.append(fuse_targets[idx])\n    global max_train_acc, max_acc,max_f1\n    for i in range(0, len(X_test), config['batch_size']):\n        if i + config['batch_size'] > len(X_test):\n            x, y = X_test[i:], Y_test[i:]\n        else:\n            x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        text_feature, audio_feature = model.pretrained_feature(x)\n        with torch.no_grad():\n            # concat_x = torch.cat((audio_feature, text_feature), dim=1)\n            audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()\n            text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()\n            concat_x = torch.cat((text_feature, audio_feature), dim=1)\n            output = model(concat_x)\n        # loss = criterion(output, torch.tensor(y))\n        loss = criterion(text_feature, audio_feature, y, model)\n        pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))\n        total_loss += loss.item()\n        \n    y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])\n    \n    print('\\nTest set: Average loss: {:.4f}'.format(total_loss/len(X_test)))\n    # custom evaluation metrics\n    print('Calculating additional test metrics...')\n    accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n    precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n    recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n    f1_score = 2 * (precision * recall) / (precision + recall)\n    print(\"Accuracy: {}\".format(accuracy))\n    print(\"Precision: {}\".format(precision))\n    print(\"Recall: {}\".format(recall))\n    print(\"F1-Score: {}\\n\".format(f1_score))\n    print('='*89)\n    \n    if max_f1 < f1_score and max_train_acc >= len(train_idxs)*0.9 and f1_score > 0.61:\n        max_f1 = f1_score\n        max_acc = accuracy\n        save(model, os.path.join(prefix, 'Model/ClassificationWhole/Fuse/fuse_{:.2f}_{}'.format(max_f1, fold)))\n        print('*'*64)\n        print('model saved: f1: {}\\tacc: {}'.format(max_f1, max_acc))\n        print('*'*64)\n    return total_loss\n\nif __name__ == '__main__':\n    idxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']\n    text_model_paths = ['BiLSTM_128_0.64_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.62_3.pt']\n    audio_model_paths = ['BiLSTM_gru_vlad256_256_0.67_1.pt', 'BiLSTM_gru_vlad256_256_0.67_2.pt', 'BiLSTM_gru_vlad256_256_0.63_3.pt']\n    for fold in range(1, 4):\n        # if fold != 2:\n        #     continue\n        train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold-1])), allow_pickle=True)\n        test_idxs_tmp = list(set(list(fuse_dep_idxs)+list(fuse_non_idxs)) - set(train_idxs_tmp))\n        resample_idxs = list(range(6))\n\n        train_idxs, test_idxs = [], []\n        # depression data augmentation\n        for idx in train_idxs_tmp:\n            if idx in fuse_dep_idxs:\n                feat = fuse_features[idx]\n                audio_perm = itertools.permutations(feat[0], 3)\n                text_perm = itertools.permutations(feat[1], 3)\n                count = 0\n                for fuse_perm in zip(audio_perm, text_perm):\n                    if count in resample_idxs:\n                        fuse_features.append(fuse_perm)\n                        fuse_targets = np.hstack((fuse_targets, 1))\n                        train_idxs.append(len(fuse_features)-1)\n                    count += 1\n            else:\n                train_idxs.append(idx)\n\n        for idx in test_idxs_tmp:\n            if idx in fuse_dep_idxs:\n                feat = fuse_features[idx]\n                audio_perm = itertools.permutations(feat[0], 3)\n                text_perm = itertools.permutations(feat[1], 3)\n                count = 0\n                resample_idxs = [0,1,4,5]\n                for fuse_perm in zip(audio_perm, text_perm):\n                    if count in resample_idxs:\n                        fuse_features.append(fuse_perm)\n                        fuse_targets = np.hstack((fuse_targets, 1))\n                        test_idxs.append(len(fuse_features)-1)\n                    count += 1\n            else:\n                test_idxs.append(idx)\n\n        text_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Text/{}'.format(text_model_paths[fold-1])))\n        audio_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Audio/{}'.format(audio_model_paths[fold-1])))\n        model_state_dict = {}\n        model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']\n        model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']\n        model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']\n        model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']\n\n        model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']\n        model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']\n        model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']\n        model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']\n\n        model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']\n        model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']\n        model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']\n        model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']\n\n        model_state_dict['ln.weight'] = audio_lstm_model.state_dict()['ln.weight']\n        model_state_dict['ln.bias'] = audio_lstm_model.state_dict()['ln.bias']\n        model.load_state_dict(text_lstm_model.state_dict(), strict=False)\n        # model.load_state_dict(audio_lstm_model.state_dict(), strict=False)\n        model.load_state_dict(model_state_dict, strict=False)\n            \n        for param in model.parameters():\n            param.requires_grad = False\n\n        model.fc_final[0].weight.requires_grad = True\n        # model.fc_final[0].bias.requires_grad = True\n        # model.modal_attn.weight.requires_grad = True\n\n        max_f1 = -1\n        max_acc = -1\n        max_train_acc = -1\n\n        for ep in range(1, config['epochs']):\n            train(ep, train_idxs)\n            tloss = evaluate(model, test_idxs, fold, train_idxs)"
  },
  {
    "path": "DepressionCollected/Classification/text_bilstm_whole.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nfrom sklearn.model_selection import train_test_split\n\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']\ntext_dep_idxs_tmp = np.where(text_targets == 1)[0]\ntext_non_idxs = np.where(text_targets == 0)[0]\n\nclass TextBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(TextBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        self.init_weight()\n        \n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'ln' not in name:\n                if 'bias' in name:\n                    nn.init.constant_(param, 0.0)\n                elif 'weight' in name:\n                    nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=self.bidirectional)\n                \n        # FC层\n        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)\n        self.fc_out = nn.Sequential(\n            # nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            # nn.ReLU(),\n            nn.Softmax(dim=1),\n        )\n\n        self.ln1 = nn.LayerNorm(self.embedding_size)\n        self.ln2 = nn.LayerNorm(self.hidden_dims)\n\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        # x : [len_seq, batch_size, embedding_dim]\n        x = x.permute(1, 0, 2)\n        # x = self.ln1(x)\n        output, (final_hidden_state, _) = self.lstm_net(x)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        # atten_out = self.ln2(atten_out)\n        return self.fc_out(atten_out)\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n    \ndef standard_confusion_matrix(y_test, y_test_pred):\n    \"\"\"\n    Make confusion matrix with format:\n                  -----------\n                  | TP | FP |\n                  -----------\n                  | FN | TN |\n                  -----------\n    Parameters\n    ----------\n    y_true : ndarray - 1D\n    y_pred : ndarray - 1D\n\n    Returns\n    -------\n    ndarray - 2D\n    \"\"\"\n    [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)\n    return np.array([[tp, fp], [fn, tn]])\n\ndef model_performance(y_test, y_test_pred_proba):\n    \"\"\"\n    Evaluation metrics for network performance.\n    \"\"\"\n    y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]\n\n    # Computing confusion matrix for test dataset\n    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)\n    print(\"Confusion Matrix:\")\n    print(conf_matrix)\n\n    return y_test_pred, conf_matrix\n\ndef train(epoch, train_idxs):\n    global lr, train_acc\n    model.train()\n    batch_idx = 1\n    total_loss = 0\n    correct = 0\n    X_train = text_features[train_idxs]\n    Y_train = text_targets[train_idxs]\n    for i in range(0, X_train.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_train.shape[0]:\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(\n                i + config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(y))\n\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        output = model(x)\n        pred = output.data.max(1, keepdim=True)[1]\n        #print(pred.shape, y.shape)\n        correct += pred.eq(y.data.view_as(pred)).cpu().sum()\n        loss = criterion(output, y)\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        total_loss += loss.item()\n\n    train_acc = correct\n    print(\n        'Train Epoch: {:2d}\\t Learning rate: {:.4f}\\tLoss: {:.6f}\\t Accuracy: {}/{} ({:.0f}%)\\n '\n        .format(epoch + 1, config['learning_rate'], total_loss, correct,\n                X_train.shape[0], 100. * correct / X_train.shape[0]))\n\n\ndef evaluate(model, test_idxs, fold, train_idxs):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global max_f1, max_acc, min_mae, X_test_lens, max_prec, max_rec\n    pred = np.array([])\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(text_features[test_idxs]).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(text_targets[test_idxs])).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(text_features[test_idxs]).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(text_targets[test_idxs])).type(torch.LongTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y)\n        total_loss += loss.item()\n        y_test_pred, conf_matrix = model_performance(y, output.cpu())\n        accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)\n        precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])\n        recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])\n        f1_score = 2 * (precision * recall) / (precision + recall)\n        print(\"Accuracy: {}\".format(accuracy))\n        print(\"Precision: {}\".format(precision))\n        print(\"Recall: {}\".format(recall))\n        print(\"F1-Score: {}\\n\".format(f1_score))\n        print('=' * 89)\n\n        if max_f1 <= f1_score and train_acc > len(train_idxs)*0.9 and f1_score > 0.5:\n            max_f1 = f1_score\n            max_acc = accuracy\n            max_rec = recall\n            max_prec = precision\n            save(model, os.path.join(prefix, 'Model/ClassificationWhole/Text/BiLSTM_{}_{:.2f}_{}'.format(config['hidden_dims'], max_f1, fold)))\n            print('*' * 64)\n            print('model saved: f1: {}\\tacc: {}'.format(max_f1, max_acc))\n            print('*' * 64)\n\n    return total_loss\n\ndef get_param_group(model):\n    nd_list = []\n    param_list = []\n    for name, param in model.named_parameters():\n        if 'ln' in name:\n            nd_list.append(param)\n        else:\n            param_list.append(param)\n    return [{'params': param_list, 'weight_decay': 1e-5}, {'params': nd_list, 'weight_decay': 0}]\n\nconfig = {\n    'num_classes': 2,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 1024,\n    'batch_size': 4,\n    'epochs': 150,\n    'learning_rate': 1e-5,\n    'hidden_dims': 128,\n    'bidirectional': True,\n    'cuda': False,\n}\n\ntrain_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),\nnp.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]\nfold = 1\n\nfor idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):\n    # if idx_idx != 2:\n    #     continue\n    test_idxs_tmp = list(set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))\n    train_idxs, test_idxs = [], []\n    # depression data augmentation\n    for idx in train_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            resample_idxs = [0,1,2,3,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    train_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            train_idxs.append(idx)\n\n    for idx in test_idxs_tmp:\n        if idx in text_dep_idxs_tmp:\n            feat = text_features[idx]\n            count = 0\n            # resample_idxs = random.sample(range(6), 4)\n            resample_idxs = [0,1,4,5]\n            for i in itertools.permutations(feat, feat.shape[0]):\n                if count in resample_idxs:\n                    text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n                    text_targets = np.hstack((text_targets, 1))\n                    test_idxs.append(len(text_features)-1)\n                count += 1\n        else:\n            test_idxs.append(idx)\n\n    model = TextBiLSTM(config)\n\n    param_group = get_param_group(model)\n    optimizer = optim.AdamW(param_group, lr=config['learning_rate'])\n    criterion = nn.CrossEntropyLoss()\n    max_f1 = -1\n    max_acc = -1\n    max_rec = -1\n    max_prec = -1\n    train_acc = -1\n\n    for ep in range(1, config['epochs']):\n        train(ep, train_idxs)\n        tloss = evaluate(model, test_idxs, fold, train_idxs)\n    fold += 1"
  },
  {
    "path": "DepressionCollected/Classification/text_features_whole.py",
    "content": "import numpy as np\nimport pandas as pd\nimport wave\nimport librosa\nimport re\n# from allennlp.commands.elmo import ElmoEmbedder\nimport os\nprefix = os.path.abspath(os.path.join(os.getcwd(), \".\"))\nfrom elmoformanylangs import Embedder\nimport pkuseg\nimport thulac\n# from pyhanlp import HanLP\nimport jieba\n# seg = pkuseg.pkuseg()\n# thu1 = thulac.thulac(seg_only=True)\nelmo = Embedder('/Users/linlin/Desktop/SpeechRecognition/DepressionCode/ELMoForManyLangs/zhs.model')\n\ntopics = ['positive', 'neutral', 'negative']\nanswers = {}\ntext_features = []\ntext_targets = []\n\ndef extract_features(text_features, text_targets, path):\n    for index in range(114):\n        if os.path.isdir(os.path.join(prefix, path, str(index+1))):\n            answers[index+1] = []\n            for topic in topics:\n                with open(os.path.join(prefix, path, str(index+1), '%s.txt'%(topic)) ,'r') as f:\n                    lines = f.readlines()[0]\n                    # seg_text = seg.cut(lines) \n                    # seg_text = thu1.cut(lines)\n                    # seg_text_iter = HanLP.segment(lines) \n                    seg_text_iter = jieba.cut(lines, cut_all=False) \n                    answers[index+1].append([item for item in seg_text_iter])\n                    # answers[dir].append(seg_text)\n            with open(os.path.join(prefix, '{1}/{0}/new_label.txt'.format(index+1, path))) as fli:\n                target = float(fli.readline())\n            # text_targets.append(1 if target >= 53 else 0)\n            text_targets.append(target)\n            text_features.append([np.array(item).mean(axis=0) for item in elmo.sents2elmo(answers[index+1])])\n\nextract_features(text_features, text_targets, 'Data')\nextract_features(text_features, text_targets, 'ValidationData')\n\nprint(\"Saving npz file locally...\")\nnp.savez(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'), text_features)\nnp.savez(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'), text_targets)\n    "
  },
  {
    "path": "DepressionCollected/DAICFeatureExtarction/feature_extraction.py",
    "content": "import os\nimport sys\nsys.path.append('/Users/linlin/Desktop/DepressionCollected')\nfrom Classification.audio_features_whole import wav2vlad\n\nimport numpy as np\nimport pandas as pd\nimport wave\n\nprefix = os.getcwd()\ntrain_split_df = pd.read_csv(os.path.join(prefix, 'DAIC/train_split_Depression_AVEC2017.csv'))\ntest_split_df = pd.read_csv(os.path.join(prefix, 'DAIC/dev_split_Depression_AVEC2017.csv'))\ntrain_split_num = train_split_df[['Participant_ID']]['Participant_ID'].tolist()\ntest_split_num = test_split_df[['Participant_ID']]['Participant_ID'].tolist()\ntrain_split_clabel = train_split_df[['PHQ8_Binary']]['PHQ8_Binary'].tolist()\ntest_split_clabel = test_split_df[['PHQ8_Binary']]['PHQ8_Binary'].tolist()\ntrain_split_rlabel = train_split_df[['PHQ8_Score']]['PHQ8_Score'].tolist()\ntest_split_rlabel = test_split_df[['PHQ8_Score']]['PHQ8_Score'].tolist()\n\nwith open('./queries.txt') as f:\n    queries = f.readlines()\n\ndef identify_topics(sentence):\n    for query in queries:\n        query = query.strip('\\n')\n        sentence = sentence.strip('\\n')\n        if query == sentence:\n            return True\n    return False\n\ndef extract_features(number):\n    transcript = pd.read_csv(os.path.join(prefix, 'DAIC/{0}_P/{0}_TRANSCRIPT.csv'.format(number)), sep='\\t').fillna('')\n    \n    wavefile = wave.open(os.path.join(prefix, 'DAIC/{0}_P/{0}_AUDIO.wav'.format(number, 'r')))\n    sr = wavefile.getframerate()\n    nframes = wavefile.getnframes()\n    wave_data = np.frombuffer(wavefile.readframes(nframes), dtype=np.short)\n    \n    response = ''\n    start_time = 0\n    stop_time = 0\n    feats = []\n    signal = []\n\n    for t in transcript.itertuples():\n        # 问题开始\n        if getattr(t,'speaker') == 'Ellie' and (identify_topics(getattr(t,'value')) or 'i think i have asked everything' in getattr(t,'value')):\n            # 初始化\n            response = ''\n            if len(signal) == 0:\n                continue\n            feats.append(wav2vlad(signal, sr))\n            signal = []\n        elif getattr(t,'speaker') == 'Participant':\n            if 'scrubbed_entry' in getattr(t,'value'):\n                continue\n            start_time = int(getattr(t,'start_time')*sr)\n            stop_time = int(getattr(t,'stop_time')*sr)\n            response += (' ' + getattr(t,'value'))\n            signal = np.hstack((signal, wave_data[start_time:stop_time].astype(np.float)))\n    \n    print(np.shape(feats))\n    print('{}_P feature done'.format(number))\n    return feats\n    \n# training set\naudio_features_train = []\naudio_ctargets_train = []\naudio_rtargets_train = []\n\n# test set\naudio_features_test = []\naudio_ctargets_test = []\naudio_rtargets_test = []\n\n# training set\nfor index in range(len(train_split_num)):\n    feat = extract_features(train_split_num[index])\n    audio_features_train.append(feat)\n    audio_ctargets_train.append(train_split_clabel[index])\n    audio_rtargets_train.append(train_split_rlabel[index])\n    \nprint(\"Saving npz file locally...\")\nnp.savez(os.path.join(prefix, 'DAICCode/Features/train_samples_clf.npz'), audio_features_train)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/train_samples_reg.npz'), audio_features_train)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/train_labels_clf.npz'), audio_ctargets_train)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/train_labels_reg.npz'), audio_rtargets_train)\n\n# test set\nfor index in range(len(test_split_num)):\n    feat = extract_features(test_split_num[index])\n    audio_features_test.append(feat)\n    audio_ctargets_test.append(test_split_clabel[index])\n    audio_rtargets_test.append(test_split_rlabel[index])\n\nprint(\"Saving npz file locally...\")\nnp.savez(os.path.join(prefix, 'DAICCode/Features/test_samples_clf.npz'), audio_features_test)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/test_samples_reg.npz'), audio_features_test)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/test_labels_clf.npz'), audio_ctargets_test)\nnp.savez(os.path.join(prefix, 'DAICCode/Features/test_labels_reg.npz'), audio_rtargets_test)\n"
  },
  {
    "path": "DepressionCollected/DAICFeatureExtarction/queries.txt",
    "content": "how are you doing today\nwhere are you from originally\nwhy'd you move to l_a\nhow do you like l_a\nwhat are some things you really like about l_a\nhow easy was it for you to get used to living in l_a\nwhat are some things you don't really like about l_a\nwhat'd you study at school\nare you still doing that\nwhat's your dream job\ndo you travel a lot\nwhy\nhow often do you go back to your hometown\ndo you consider yourself an introvert\nwhat do you do to relax\nhow are you at controlling your temper\nwhen was the last time you argued with someone and what was it about\nhow did you feel in that moment\ntell me more about that\nhow close are you to them\nhow do you know them\nwhat are some things you like to do for fun\nwho's someone that's been a positive influence in your life\ncan you tell me about that\nhow close are you to your family\nis there anything you regret\nwhat made you decide to do that\ncould you have done anything to avoid it\nwhat's one of your most memorable experiences\nwhat's it like for you living with them\nhow do you like your living situation\ndo you have roommates\nhow easy is it for you to get a good night's sleep\ndo you feel that way often\nwhat are you like when you don't sleep well\ndo you feel down\nhave you been diagnosed with depression\nhave you ever been diagnosed with p_t_s_d\nhave you ever served in the military\nwhen was the last time you felt really happy\nwhat do you think of today's kids\ncan you give me an example of that\nwhat do you do when you're annoyed\nwhen was the last time that happened\nhow would your best friend describe you\nwhere do you live\nhow hard is that\nwhat do you do now\nare you happy you did that\nwhat are some things that make you really mad\nwhat do you do to relax\nlike what\nare you still working in that\n<laughter> can you give me an example of that\ndo you feel down \nlike what \nhow do you cope with them\nhave you noticed any changes in your behavior or thoughts lately\ndo you have disturbing thoughts\nhow easy is it for you to get a good night sleep\nwhat do you enjoy about traveling\ni'd love to hear about one of your trips\nwhat advice would you give yourself ten or twenty years ago     \nwhat are some things you really like about l_a \nhow are you at controlling your temper \nhas that gotten you in trouble\ndo you find it easy to be a parent\nwhat's the hardest thing about being a parent\ntell me about your kids\nwhat's one of your most memorable experiences \nhow did you feel in that moment \nhave you ever served in the military \nhave you been diagnosed with depression \nhow would you best friend describe you \nwhat'd you study at school \nnice are you still doing that\nwhat are some things that make you really mad \ncould you have done anything to avoid it \ncould you say a little more about that \nwhen was the last time you argued with someone and what was it about \n<laughter> do you travel a lot\nwhen was the last time that happened \nhave you ever been diagnosed with p_t_s_d \nhow would your best friend describe you \nwhen was the last time you felt really happy\nhow did you decide to do that\nokay could you have done anything to avoid it\ndo you feel like therapy is useful\ndid you think you had a problem before you found out\nhow has seeing a therapist affected you\nwhat sort of changes have you noticed since you've been going to therapy\nwhy did you stop\nwho's someone that's been a positive influence in your life \nwhen did you move to l_a\nhow often do you go back to your home town\nwhat got you to seek help\nwhat were your symptoms\nyeah what do you enjoy about traveling\nokay what's the best thing about being a parent\nwhen was the last time you argued with someone and what was it about\n<laughter> could you say a little more about that\nhow long ago were you diagnosed\nso how are you doing today\ncould you say a little more about that\ndo you still go to therapy now\ndo you feel like therapy's useful\nhave you noticed any changes in your behavior or thoughts lately \ntell me about that\nwhat would you say are some of your best qualities\nwhat are some things that usually put you in a good mood\nwhat are you most proud of in your life\nhow does it compare to l_a\ntell me about something you did recently that you really enjoyed\nis going to a therapist helping you\nhow have you been feeling lately\nare they triggered by something\nwhat's the best thing about being a parent\nwhy'd you decide to enlist in the military\nhow old were you when you joined the military\nhow did serving in the military change you\nwhat did you do after the military\nwhen'd you move to l_a\nhow has seeing a therapist affected you \nwho's someone that's been a positive influence in  your life\nwhat are some things you like to do for fun who's someone that's been a positive influence in your life \nwhat was it about\ndo you think that maybe you're being a little hard on yourself\nso how are you doing today \nwhere are you from originally \nhow easy was it for you to get used to living in l_a \nwhat are some things you don't really like about l_a \nhow often to you go back to your home town \nwhy \nhow close are you to your family <asks do you travel a lot simultaneously>\ndo you travel a lot \nwhat do you enjoy about traveling \ni'd love to hear about one of your trips \ndo you consider yourself an introvert  \ncan you give me an example of that \nwhat do you do when you're annoyed \nwhat do you do to relax \nwhat's your dream job \nhow long ago were you diagnosed \nwhat got you to seek help \ndo you feel like therapy's useful \ndo you still go to therapy now \nwhat sort of changes have you noticed since you've been going to therapy \nhow have you been feeling lately \ntell me more about that \nwhat would you say are some of your best qualities \nwhat are some things that usually put you in a good mood  \nwhen was the last time you felt really happy \nwho's someone that's been a positive influence in your life \nhow do you know them \nhow close are you to them \nwhat are you most proud of in your life \nare you still doing that \ndo you consider yourself an introvert \ndo you feel that way often \nhow do you like your living situation \ndo you have roommates \nhow easy is it for you to get a good night's sleep \nwhat are you like when you don't sleep well \nwhat advice would you give yourself ten or twenty years ago \nhow close are you to your family \ntell me about something you did recently that you really enjoyed \nwhat are some things that usually put you in a good mood \nwhy why\nwhat made you decide to go and see someone\nokay so how are you doing today\nwhy'd you move to l_a \nhow often do you go back to your hometown \nhow did you decide to do that \nis there anything you regret \ncould you have done anything to avoid it  \nhow easy is it for you to get a good night's sleep  \ndo you find it easy to be a parent \nwhat's the best thing about being a parent \nwhat's the hardest thing about being a parent \nand please feel free to tell me anything you answers are totally confidential\nand please feel free to tell me anything you're answers are totally confidential\nwhat made you decide to do that \nwhat advice would you give yourself ten or twenty years ago  \nwhat do you think of today's kids \ntell me about that \nhow hard is that \ncan you tell me about that \nso how are you doing today  \nare you still working in that \nwhat are some things you like to do for fun \nthat's good where are you from originally \nwhen was the last time you argued with someone and what was it about  \nwhere do you live \ndid you think you had a problem before you found out \nwhat were your symptoms \nwhy did you stop \nokay so how are you doing today  \nwhat do you do now \nare you happy you did that \nare they triggered by something \nhow do you cope with them \nhas that gotten you in trouble \nwhat are you\nwhat are some things that make you really mad  \nhow has seeing a therapist affected you  \nyeah how hard is that\nmhm what are some things you don't really like about l_a\nmhm how did you decide to do that \nhow close are you to your family do you find it easy to be a parent \nthat's good what do you think of today's kids \nawesome how did you decide to do that \nuh huh uh huh uh huh is there anything you regret is there anything you regret\nhow old were you when you joined the military \ndid you ever see combat \nhow did serving in the military change you \nwhat did you do after the military \nhow easy was it for you to go back to civilian life \nis going to a therapist helping you   \nthat's good where are you from originally\ntell me about your kids \nyeah how hard is that \ndo you think that maybe you're being a little hard on yourself \ndo you consider yourself and introvert\nhow often do you go back to your home town \nhow_doingV (so how are you doing today)\nwhere_originally (where are you from originally)\nlike_about_LA (what are some things you really like about l_a)\ndont_like_LA (what are some things you don't really like about l_a)\nstudy (what did you study at school)\nstill_doing_X (are you still doing that)\nchange_directions (what made you decide to do that)\nhappy_didthat (are you happy you did that)\njob_virtually (i love my job you could almost say it's virtually made for me what's your dream job)\nshyoutgoing (do you consider yourself more shy or outgoing)\ntell_about_that (can you tell me about that)\nrelax_fishtank (sometimes when i'm feeling tense i turn on the fish tank screensaver hey i know it's not hawaii but it's the best i've got what do you do to relax)\ncontrol_temper (how are you at controlling your temper)\nlast_argument (when was the last time you argued with someone and what was it about)\nhard_decisionB (tell me about the hardest decision you've ever had to make)\nfamily_relationship (tell me about your relationship with your family)\nfeelguilty (what's something you feel guilty about)\ngive_example (can you give me an example of that)\ndescribe_felt (how did you feel in that moment)\nptsd_diagnosed (have you ever been diagnosed with p_t_s_d)\ndepression_diagnosed (have you been diagnosed with depression)\neasy_sleep (how easy is it for you to get a good night's sleep)\nfeel_down (do you feel down)\nbehavior_changes (have you noticed any changes in your behavior or thoughts lately)\nhappy_lasttime (tell me about the last time you felt really happy)\nself_change (what are some things you wish you could change about yourself)\nsymptoms_cope (how do you cope with them)\nregret (is there anything you regret)\nadvice_back (what advice would you give to yourself ten or twenty years ago)\nEllie17Dec2012_08 (what are you most proud of in your life)\ndifficult (how hard is that)\nBF_describe (how would your best friend describe you)\nideal_weekendC (tell me how you spend your ideal weekend)\nasked_everything (okay i think i have asked everything i need to)\ntravel_shoes (i'm sure you can tell by my shoes i'm not much of a world explorer do you travel a lot)\nlike_what (like what)\ntravel_trips (i'd love to hear about one of your trips)\nstill_working_on_X (are you still working in that)\ndream_job (what's your dream job)\nsituation_handled (tell me about a situation that you wish you had handled differently)\nwhy_enlist (why'd you decide to enlist in the military)\nold (how old were you when you joined the military)\ncombat (did you ever see combat)\nwhy2 (why)\neffectB (how did serving in the military change you)\nafter (what did you do after the military)\ncivilian_life (how easy was it for you to go back to civilian life)\nfeel_lately (how have you been feeling lately)\ntherapy_useful (do you feel like therapy is useful)\nwhy_seek_help (what got you to seek help)\ntherapy_going (do you still go to therapy now)\ntherapist_affect (how has seeing a therapist affected you)\nlanded_trouble (has that gotten you in trouble)\nwhen_LA (when did you move to l_a)\noften_backB (how often do you go back to your hometown)\ncompares_LA (how does it compare to l_a)\nwhy_LA (why did you move to l_a)\nadapted_LA (how easy was it for you to get used to living in l_a)\nhard_decision (how did you decide to do that)\neasy_parent (do you find it easy to be a parent)\nparent_hardest (what's the hardest thing about being a parent)\nparent_best (what's the best thing about being a parent)\nparent_differences (what are some ways that you're different as a parent than your parents)\nmilitary (have you ever served in the military)\ntoo_hard (do you think that maybe you're being a little hard on yourself)\nEllie17Dec2012_07 (what would you say are some of your best qualities)\nmemorableB (what's one of your most memorable experiences)\ntravel_changed (what do you enjoy about traveling)\nmemory_erase (tell me about an event or something that you wish you could erase from your memory)\nbouts_symptoms (when was the last time that happened)\nargument_about (what was it about)\navoid (could you have done anything to avoid it)\ntrigger (are they triggered by something)\nsleep_affects (what are you like when you don't sleep well)\nwhen_diagnosed (how long ago were you diagnosed)\ntherapy_changes (what sort of changes have you noticed since you've been going to therapy)\nfeelbadly (tell me about a time when someone made you feel really badly about yourself)\nmore (tell me more about that)\ndisturbing_thoughts (do you have disturbing thoughts)\nEllie17Dec2012_10 (tell me about something you did recently that you really enjoyed)\nEllie17Dec2012_09 (what are some things that usually put you in a good mood)\ndo_fun (what are some things you like to do for fun)\ninfluence_positive (who's someone that's been a positive influence in your life)\nhow_close (how close are you to them)\ntell_me_about (tell me about that)\nsuspect_problem (did you think you had a problem before you found out)\nsymptoms_what (what were your symptoms)\nhow_know (how do you know them)\ntherapist_useful (is going to a therapist helping you)\nstop_going (why did you stop)\nmad_makeyou (what are some things that make you really mad)\nwhere_live (where do you live)\nroommates (do you have roommates)\nliving_situation (how do you like your living situation)\nwhat_do_when_annoyed (what do you do when you are annoyed)\nelaborate (could you say a little more about that)\nfamily_roleB (how close are you to your family)\ntodays_kids (what do you think of today's kids)\ntell_me_moreV2 (can you tell me more about that)\nkids_elaborate (tell me about your kids)"
  },
  {
    "path": "DepressionCollected/Regression/AudioModelChecking.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nfrom sklearn.model_selection import train_test_split\n\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\n\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"./\"))\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']\n\naudio_dep_idxs = np.where(audio_targets >= 53)[0]\naudio_non_idxs = np.where(audio_targets < 53)[0]\ndep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)\nnon_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)\n\nconfig = {\n    'num_classes': 1,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 256,\n    'batch_size': 4,\n    'epochs': 100,\n    'learning_rate': 5e-5,\n    'hidden_dims': 256,\n    'bidirectional': False,\n    'cuda': False\n}\n\nclass AudioBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(AudioBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True))\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        self.lstm_net_audio = nn.GRU(self.embedding_size,\n                                self.hidden_dims,\n                                num_layers=self.rnn_layers,\n                                dropout=self.dropout,\n                                bidirectional=self.bidirectional,\n                                batch_first=True)\n        # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,\n        #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)\n\n        self.bn = nn.BatchNorm1d(3)\n\n        # FC层\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            nn.ReLU(),\n            # nn.Softmax(dim=1)\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        #         h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n       # print(atten_w.shape, m.transpose(1, 2).shape)\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        x, _ = self.lstm_net_audio(x)\n        # x = self.bn(x)\n        x = x.sum(dim=1)\n        out = self.fc_audio(x)\n        return out\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n \ndef evaluate(fold, model):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = audio_features[list(test_dep_idxs)+list(test_non_idxs)]\n    Y_test = audio_targets[list(test_dep_idxs)+list(test_non_idxs)]\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(Y_test)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        total_loss += loss.item()\n        pred = output.flatten().detach().numpy()\n\n        mae = mean_absolute_error(Y_test, pred)\n        rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n        print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n        print('='*89)\nfold = 2\naudio_lstm_model = torch.load(os.path.join(prefix, 'Model/Regression/Audio%d/gru_vlad256_256_8.25.pt'%(fold+1)))\nmodel = AudioBiLSTM(config)\n# model_state_dict = {}\n# model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']\n# model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']\n# model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']\n# model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']\n\n# model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']\n# model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']\n# model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']\n# model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']\n\n# model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']\n# model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']\n# model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']\n# model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']\nmodel_state_dict = audio_lstm_model.state_dict()\nmodel.load_state_dict(model_state_dict, strict=True)\n\ntest_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]\ntest_non_idxs = non_idxs[fold*44:(fold+1)*44]\ntrain_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))\ntrain_non_idxs = list(set(non_idxs) - set(test_non_idxs))\n\n# training data augmentation\ntrain_dep_idxs = []\nfor (i, idx) in enumerate(train_dep_idxs_tmp):\n    feat = audio_features[idx]\n    if i < 14:\n        for i in itertools.permutations(feat, feat.shape[0]):\n            audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n            audio_targets = np.hstack((audio_targets, audio_targets[idx]))\n            train_dep_idxs.append(len(audio_features)-1)\n    else:\n        train_dep_idxs.append(idx)\n\n# test data augmentation\n# test_dep_idxs = []\n# for idx in test_dep_idxs_tmp:\n#     feat = audio_features[idx]\n#     for i in itertools.permutations(feat, feat.shape[0]):\n#         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n#         audio_targets = np.hstack((audio_targets, audio_targets[idx]))\n#         test_dep_idxs.append(len(audio_features)-1)\ntest_dep_idxs = test_dep_idxs_tmp\n\noptimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])\ncriterion = nn.SmoothL1Loss()\n# criterion = FocalLoss(class_num=2)\n# evaluate(fold, model)\nevaluate(fold, model)\n"
  },
  {
    "path": "DepressionCollected/Regression/audio_bilstm_perm.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nfrom sklearn.model_selection import train_test_split\n\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"./\"))\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']\n\n# audio_dep_idxs = np.where(audio_targets >= 53)[0]\n# audio_non_idxs = np.where(audio_targets < 53)[0]\n# dep_orders = random.sample(range(len(audio_dep_idxs)), len(audio_dep_idxs))\n# non_orders = random.sample(range(len(audio_non_idxs)), len(audio_non_idxs))\n# dep_idxs = audio_dep_idxs[dep_orders]\n# non_idxs = audio_non_idxs[non_orders]\n# np.save(os.path.join(prefix, 'Features/AudioWhole/dep_idxs'), dep_idxs)\n# np.save(os.path.join(prefix, 'Features/AudioWhole/non_idxs'), non_idxs)\ndep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)\nnon_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)\n\nconfig = {\n    'num_classes': 1,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 256,\n    'batch_size': 2,\n    'epochs': 120,\n    'learning_rate': 1e-5,\n    'hidden_dims': 256,\n    'bidirectional': False,\n    'cuda': False\n}\n\nclass AudioBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(AudioBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True))\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        self.lstm_net_audio = nn.GRU(self.embedding_size,\n                                self.hidden_dims,\n                                num_layers=self.rnn_layers,\n                                dropout=self.dropout,\n                                bidirectional=self.bidirectional,\n                                batch_first=True)\n        # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,\n        #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)\n\n        self.bn = nn.BatchNorm1d(3)\n\n        # FC层\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            nn.ReLU(),\n            # nn.Softmax(dim=1)\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        #         h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n       # print(atten_w.shape, m.transpose(1, 2).shape)\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        x, _ = self.lstm_net_audio(x)\n        # x = self.bn(x)\n        x = x.sum(dim=1)\n        out = self.fc_audio(x)\n        return out\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n \ndef train(epoch):\n    global lr, train_acc\n    model.train()\n    batch_idx = 1      \n    total_loss = 0\n    correct = 0\n    pred = np.array([])\n    X_train = audio_features[train_dep_idxs+train_non_idxs]\n    Y_train = audio_targets[train_dep_idxs+train_non_idxs]\n    for i in range(0, X_train.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_train.shape[0]:\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(\n                i + config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(y)).type(torch.FloatTensor)\n\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        pred = np.hstack((pred, output.flatten().detach().numpy()))\n        total_loss += loss.item()\n    train_mae = mean_absolute_error(Y_train, pred)\n\n    print('Train Epoch: {:2d}\\t Learning rate: {:.4f}\\t Loss: {:.4f}\\t MAE: {:.4f}\\t RMSE: {:.4f}\\n '\n        .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \\\n            np.sqrt(mean_squared_error(Y_train, pred))))\n    return train_mae\n\n\ndef evaluate(fold, model, train_mae):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = audio_features[list(test_dep_idxs)+list(test_non_idxs)]\n    Y_test = audio_targets[list(test_dep_idxs)+list(test_non_idxs)]\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(Y_test)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        total_loss += loss.item()\n        pred = output.flatten().detach().numpy()\n\n        mae = mean_absolute_error(Y_test, pred)\n        rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n        print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n        print('='*89)\n\n        if mae <= min_mae and mae < 8.5 and train_mae < 13:\n            min_mae = mae\n            min_rmse = rmse\n            mode = 'bi' if config['bidirectional'] else 'norm'\n            mode ='gru'\n            save(model, os.path.join(prefix, 'Model/Regression/Audio{}/{}_vlad{}_{}_{:.2f}'.format(fold+1,mode, config['embedding_size'], config['hidden_dims'], min_mae)))\n            print('*' * 64)\n            print('model saved: mae: {}\\t rmse: {}'.format(min_mae, min_rmse))\n            print('*' * 64)\n\n    return total_loss\n\nfor fold in range(3):\n    test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]\n    test_non_idxs = non_idxs[fold*44:(fold+1)*44]\n    train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))\n    train_non_idxs = list(set(non_idxs) - set(test_non_idxs))\n\n    # training data augmentation\n    train_dep_idxs = []\n    for (i, idx) in enumerate(train_dep_idxs_tmp):\n        feat = audio_features[idx]\n        if i < 14:\n            for i in itertools.permutations(feat, feat.shape[0]):\n                audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n                audio_targets = np.hstack((audio_targets, audio_targets[idx]))\n                train_dep_idxs.append(len(audio_features)-1)\n        else:\n            train_dep_idxs.append(idx)\n\n    # test data augmentation\n    # test_dep_idxs = []\n    # for idx in test_dep_idxs_tmp:\n    #     feat = audio_features[idx]\n    #     for i in itertools.permutations(feat, feat.shape[0]):\n    #         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))\n    #         audio_targets = np.hstack((audio_targets, audio_targets[idx]))\n    #         test_dep_idxs.append(len(audio_features)-1)\n    test_dep_idxs = test_dep_idxs_tmp\n\n\n    model = AudioBiLSTM(config)\n\n    if config['cuda']:\n        model = model.cuda()\n\n    # optimizer = optim.Adam(model.parameters())\n    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])\n    criterion = nn.L1Loss()\n    # criterion = FocalLoss(class_num=2)\n    min_mae = 100\n    min_rmse = 100\n    train_mae = 100\n\n\n    for ep in range(1, config['epochs']):\n        train_mae = train(ep)\n        tloss = evaluate(fold, model, train_mae)\n\n# ============== prep ==============\n# X_test = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/val_samples_reg_avid256.npz'))['arr_0'], axis=2)\n# Y_test = np.load(os.path.join(prefix, 'Features/Audio/val_labels_reg_avid256.npz'))['arr_0']\n# ============== prep ==============\n\n\n# ============== SVM ==============\n\n# from sklearn.svm import SVR\n# from sklearn.model_selection import KFold\n\n# X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = SVR(kernel='linear', gamma='auto')\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n#     # break\n\n# print(np.mean(maes), np.mean(rmses))\n# ============== SVM ==============\n\n# # ============== DT ==============\n# from sklearn.tree import DecisionTreeRegressor\n# from sklearn.model_selection import KFold\n\n# X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = DecisionTreeRegressor(max_depth=100, random_state=0, criterion=\"mse\")\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# # ============== DT ==============\n\n# # ============== RF ==============\n# from sklearn.ensemble import RandomForestRegressor\n# from sklearn.model_selection import KFold\n\n# X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = RandomForestRegressor(max_depth=100, random_state=0, criterion=\"mse\")\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# # ============== RF ==============\n\n# ============== ada ==============\n# from sklearn.ensemble import AdaBoostRegressor\n# from sklearn.model_selection import KFold\n\n# X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = AdaBoostRegressor(n_estimators=50)\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# ============== ada ==============\n"
  },
  {
    "path": "DepressionCollected/Regression/fuse_net.py",
    "content": "\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nimport numpy as np\nimport pandas as pd\nimport wave\nimport librosa\nfrom python_speech_features import *\nimport re\nfrom allennlp.commands.elmo import ElmoEmbedder\nimport os\nimport tensorflow.compat.v1 as tf\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"./\"))\n\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'))['arr_0']\naudio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)\naudio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']\nfuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]\nfuse_targets = text_targets\n\nfuse_dep_idxs = np.where(text_targets >= 53)[0]\nfuse_non_idxs = np.where(text_targets < 53)[0]\ndep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)\nnon_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)\n\ntext_model_paths = ['Model/Regression/Text1/BiLSTM_128_7.75.pt', 'Model/Regression/Text2/BiLSTM_128_8.46.pt', 'Model/Regression/Text3/BiLSTM_128_8.01.pt']\naudio_model_paths = ['Model/Regression/Audio1/gru_vlad256_256_7.60.pt', 'Model/Regression/Audio2/gru_vlad256_256_8.38.pt', 'Model/Regression/Audio3/gru_vlad256_256_8.25.pt']\n\nconfig = {\n    'num_classes': 1,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'audio_embed_size': 256,\n    'text_embed_size': 1024,\n    'batch_size': 4,\n    'epochs': 150,\n    'learning_rate': 8e-5,\n    'audio_hidden_dims': 256,\n    'text_hidden_dims': 128,\n    'cuda': False,\n    'lambda': 1e-2,\n}\n\nclass TextBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(TextBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        self.init_weight()\n        \n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=self.bidirectional)\n        \n        # self.init_weight()\n        \n        # FC层\n        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            nn.ReLU(),\n            # nn.Softmax(dim=1),\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        \n        # x : [len_seq, batch_size, embedding_dim]\n        x = x.permute(1, 0, 2)\n        output, (final_hidden_state, final_cell_state) = self.lstm_net(x)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        return self.fc_out(atten_out)\n\nclass AudioBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(AudioBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n\n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True))\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        self.lstm_net_audio = nn.GRU(self.embedding_size,\n                                self.hidden_dims,\n                                num_layers=self.rnn_layers,\n                                dropout=self.dropout,\n                                bidirectional=self.bidirectional,\n                                batch_first=True)\n        # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,\n        #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)\n\n        self.bn = nn.BatchNorm1d(3)\n\n        # FC层\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            nn.ReLU(),\n            # nn.Softmax(dim=1)\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        #         h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n       # print(atten_w.shape, m.transpose(1, 2).shape)\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        x, _ = self.lstm_net_audio(x)\n        # x = self.bn(x)\n        x = x.sum(dim=1)\n        out = self.fc_audio(x)\n        return out\n\nclass fusion_net(nn.Module):\n    def __init__(self, text_embed_size, text_hidden_dims, rnn_layers, dropout, num_classes, \\\n         audio_hidden_dims, audio_embed_size):\n        super(fusion_net, self).__init__()\n        self.text_embed_size = text_embed_size\n        self.audio_embed_size = audio_embed_size\n        self.text_hidden_dims = text_hidden_dims\n        self.audio_hidden_dims = audio_hidden_dims\n        self.rnn_layers = rnn_layers\n        self.dropout = dropout\n        self.num_classes = num_classes\n        \n        # ============================= TextBiLSTM =================================\n        \n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=True)\n        # FC层\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.text_hidden_dims, self.text_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout)\n        )\n        \n        # ============================= TextBiLSTM =================================\n\n        # ============================= AudioBiLSTM =============================\n\n        self.lstm_net_audio = nn.GRU(self.audio_embed_size,\n                                self.audio_hidden_dims,\n                                num_layers=self.rnn_layers,\n                                dropout=self.dropout,\n                                bidirectional=False,\n                                batch_first=True)\n\n        self.fc_audio = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.audio_hidden_dims, self.audio_hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout)\n        )\n        \n        # ============================= AudioBiLSTM =============================\n\n        # ============================= last fc layer =============================\n        # self.bn = nn.BatchNorm1d(self.text_hidden_dims + self.audio_hidden_dims)\n        # modal attention\n        self.modal_attn = nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.text_hidden_dims + self.audio_hidden_dims, bias=False)\n        self.fc_final = nn.Sequential(\n            nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.num_classes, bias=False),\n            nn.ReLU(),\n            # nn.Softmax(dim=1),\n            # nn.Sigmoid()\n        )\n        \n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n    \n    def pretrained_feature(self, x):\n        with torch.no_grad():\n            x_text = []\n            x_audio = []\n            for ele in x:\n                x_text.append(ele[1])\n                x_audio.append(ele[0])\n            x_text, x_audio = Variable(torch.tensor(x_text).type(torch.FloatTensor), requires_grad=False), Variable(torch.tensor(x_audio).type(torch.FloatTensor), requires_grad=False)\n            # ============================= TextBiLSTM =================================\n            # x : [len_seq, batch_size, embedding_dim]\n            x_text = x_text.permute(1, 0, 2)\n            output, (final_hidden_state, _) = self.lstm_net(x_text)\n            # output : [batch_size, len_seq, n_hidden * 2]\n            output = output.permute(1, 0, 2)\n            # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n            final_hidden_state = final_hidden_state.permute(1, 0, 2)\n            # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n            # atten_out = self.attention_net(output, final_hidden_state)\n            atten_out = self.attention_net_with_w(output, final_hidden_state)\n            text_feature = self.fc_out(atten_out)\n\n            # ============================= TextBiLSTM =================================\n\n            # ============================= AudioBiLSTM =============================\n\n            x_audio, _ = self.lstm_net_audio(x_audio)\n            x_audio = x_audio.sum(dim=1)\n            audio_feature = self.fc_audio(x_audio)\n\n        # ============================= AudioBiLSTM =============================\n        return (text_feature, audio_feature)\n        \n    def forward(self, x): \n        # x = self.bn(x)\n        modal_weights = torch.sigmoid(self.modal_attn(x))\n        # modal_weights = self.modal_attn(x)\n        x = (modal_weights * x)\n        output = self.fc_final(x)\n        return output\n    \nclass MyLoss(nn.Module):\n    def __init__(self):\n        super(MyLoss, self).__init__()\n        \n    def forward(self, text_feature, audio_feature, target, model):\n        weight = model.fc_final[0].weight\n        # bias = model.fc_final[0].bias\n        # print(weight, bias)\n        pred_text = F.linear(text_feature, weight[:, :config['text_hidden_dims']])\n        pred_audio = F.linear(audio_feature, weight[:, config['text_hidden_dims']:])\n        # l = nn.CrossEntropyLoss()\n        l = nn.SmoothL1Loss()\n        target = torch.tensor(target).view_as(pred_text).float()\n        return l(pred_text, target) + l(pred_audio, target)\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n\ndef train(model, epoch):\n    global max_train_acc, train_acc\n    model.train()\n    batch_idx = 1\n    total_loss = 0\n    correct = 0\n    pred = np.array([])\n    X_train = []\n    Y_train = []\n    for idx in train_dep_idxs+train_non_idxs:\n        X_train.append(fuse_features[idx])\n        Y_train.append(fuse_targets[idx])\n    for i in range(0, len(X_train), config['batch_size']):\n        if i + config['batch_size'] > len(X_train):\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i+config['batch_size'])], Y_train[i:(i+config['batch_size'])]\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        text_feature, audio_feature = model.pretrained_feature(x)\n        audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()\n        text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()\n        # concat_x = torch.cat((text_feature_norm, audio_feature_norm), dim=1)\n        concat_x = torch.cat((text_feature, audio_feature), dim=1)\n        output = model(concat_x)\n        # loss = criterion(output, torch.tensor(y).float())\n        loss = criterion(text_feature, audio_feature, y, model)\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        pred = np.hstack((pred, output.flatten().detach().numpy()))\n        total_loss += loss.item()\n    train_mae = mean_absolute_error(Y_train, pred)\n    print('Train Epoch: {:2d}\\t Learning rate: {:.4f}\\t Loss: {:.4f}\\t MAE: {:.4f}\\t RMSE: {:.4f}\\n '\n        .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \\\n            np.sqrt(mean_squared_error(Y_train, pred))))\n    return train_mae\n\ndef evaluate(model, fold, train_mae):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = []\n    Y_test = []\n    for idx in list(test_dep_idxs)+list(test_non_idxs):\n        X_test.append(fuse_features[idx])\n        Y_test.append(fuse_targets[idx])\n    for i in range(0, len(X_test), config['batch_size']):\n        if i + config['batch_size'] > len(X_test):\n            x, y = X_test[i:], Y_test[i:]\n        else:\n            x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]\n        text_feature, audio_feature = model.pretrained_feature(x)\n        with torch.no_grad():\n            audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()\n            text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()\n            concat_x = torch.cat((text_feature, audio_feature), dim=1)\n            # concat_x = torch.cat((text_feature_norm, audio_feature_norm), dim=1)\n            output = model(concat_x)\n        # loss = criterion(output, torch.tensor(y).float())\n        loss = criterion(text_feature, audio_feature, y, model)\n        pred = np.hstack((pred, output.flatten().detach().numpy()))\n        total_loss += loss.item()\n        \n    mae = mean_absolute_error(Y_test, pred)\n    rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n    print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n    print('='*89)\n\n    if mae <= min_mae and mae < 8.2 and train_mae < 13:\n        min_mae = mae\n        min_rmse = rmse\n        save(model, os.path.join(prefix, 'Model/Regression/Fuse{}/fuse_{:.2f}'.format(fold+1, min_mae)))\n        print('*' * 64)\n        print('model saved: mae: {}\\t rmse: {}'.format(min_mae, min_rmse))\n        print('*' * 64)\n\n    return total_loss\n\ndef evaluate_audio(model):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = []\n    Y_test = []\n    for idx in list(test_dep_idxs)+list(test_non_idxs):\n        X_test.append(fuse_features[idx][0])\n        Y_test.append(fuse_targets[idx])\n    X_test = np.array(X_test)\n    Y_test = np.array(Y_test)\n\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(Y_test)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        total_loss += loss.item()\n        pred = output.flatten().detach().numpy()\n\n        mae = mean_absolute_error(Y_test, pred)\n        rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n        print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n        print('='*89)\n\ndef evaluate_text(model):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = []\n    Y_test = []\n    for idx in list(test_dep_idxs)+list(test_non_idxs):\n        X_test.append(fuse_features[idx][1])\n        Y_test.append(fuse_targets[idx])\n    X_test = np.array(X_test)\n    Y_test = np.array(Y_test)\n    criterion = nn.SmoothL1Loss()\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(Y_test)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        total_loss += loss.item()\n        pred = output.flatten().detach().numpy()\n\n        mae = mean_absolute_error(Y_test, pred)\n        rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n        print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n        print('='*89)\n\nfor fold in range(3):\n    test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]\n    test_non_idxs = non_idxs[fold*44:(fold+1)*44]\n    train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))\n    train_non_idxs = list(set(non_idxs) - set(test_non_idxs))\n\n    train_dep_idxs = []\n    test_dep_idxs = []\n    # depression data augmentation\n    for (i, idx) in enumerate(train_dep_idxs_tmp):\n        feat = fuse_features[idx]\n        audio_perm = itertools.permutations(feat[0], 3)\n        text_perm = itertools.permutations(feat[1], 3)\n        if i < 14:\n            for fuse_perm in zip(audio_perm, text_perm):\n                fuse_features.append(fuse_perm)\n                fuse_targets = np.hstack((fuse_targets, fuse_targets[idx]))\n                train_dep_idxs.append(len(fuse_features)-1)\n        else:\n            train_dep_idxs.append(idx)\n\n    test_dep_idxs = test_dep_idxs_tmp\n\n    model = fusion_net(config['text_embed_size'], config['text_hidden_dims'], config['rnn_layers'], \\\n    config['dropout'], config['num_classes'], config['audio_hidden_dims'], config['audio_embed_size'])\n\n    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])\n    # optimizer = optim.Adam(model.parameters())\n    # criterion = nn.SmoothL1Loss()\n    criterion = MyLoss()\n\n    text_lstm_model = torch.load(os.path.join(prefix, text_model_paths[fold]))\n    audio_lstm_model = torch.load(os.path.join(prefix, audio_model_paths[fold]))\n    model_state_dict = {}\n    model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']\n    model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']\n    model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']\n    model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']\n\n    model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']\n    model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']\n    model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']\n    model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']\n\n    model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']\n    model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']\n    model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']\n    model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']\n    model.load_state_dict(text_lstm_model.state_dict(), strict=False)\n    # model.load_state_dict(audio_lstm_model.state_dict(), strict=False)\n    model.load_state_dict(model_state_dict, strict=False)\n    \n    for param in model.parameters():\n        param.requires_grad = True\n\n    model.fc_final[0].weight.requires_grad = True\n    # model.fc_final[0].bias.requires_grad = True\n    model.modal_attn.weight.requires_grad = True\n    min_mae = 100\n    min_rmse = 100\n    train_mae = 100\n\n    for ep in range(1, config['epochs']):\n        train_mae = train(model, ep)\n        tloss = evaluate(model, fold, train_mae)\n    # evaluate_audio(audio_lstm_model)\n    # evaluate_text(text_lstm_model)"
  },
  {
    "path": "DepressionCollected/Regression/text_bilstm_perm.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom torch.nn import functional as F\nimport torch.optim as optim\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import mean_absolute_error, mean_squared_error\nfrom sklearn.model_selection import train_test_split\n\nimport numpy as np\nimport pandas as pd\nimport os\nimport pickle\nimport random\nimport itertools\n\nprefix = os.path.abspath(os.path.join(os.getcwd(), \"../\"))\ntext_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'))['arr_0']\ntext_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'))['arr_0']\n\ndep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)\nnon_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)\n\nconfig = {\n    'num_classes': 1,\n    'dropout': 0.5,\n    'rnn_layers': 2,\n    'embedding_size': 1024,\n    'batch_size': 2,\n    'epochs': 110,\n    'learning_rate': 1e-5,\n    'hidden_dims': 128,\n    'bidirectional': True,\n    'cuda': False,\n}\n\nclass TextBiLSTM(nn.Module):\n    def __init__(self, config):\n        super(TextBiLSTM, self).__init__()\n        self.num_classes = config['num_classes']\n        self.learning_rate = config['learning_rate']\n        self.dropout = config['dropout']\n        self.hidden_dims = config['hidden_dims']\n        self.rnn_layers = config['rnn_layers']\n        self.embedding_size = config['embedding_size']\n        self.bidirectional = config['bidirectional']\n\n        self.build_model()\n        self.init_weight()\n        \n    def init_weight(net):\n        for name, param in net.named_parameters():\n            if 'bias' in name:\n                nn.init.constant_(param, 0.0)\n            elif 'weight' in name:\n                nn.init.xavier_uniform_(param)\n\n    def build_model(self):\n        # attention layer\n        self.attention_layer = nn.Sequential(\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(inplace=True)\n        )\n        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)\n\n        # 双层lstm\n        self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,\n                                num_layers=self.rnn_layers, dropout=self.dropout,\n                                bidirectional=self.bidirectional)\n        \n        # self.init_weight()\n        \n        # FC层\n        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)\n        self.fc_out = nn.Sequential(\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.hidden_dims),\n            nn.ReLU(),\n            nn.Dropout(self.dropout),\n            nn.Linear(self.hidden_dims, self.num_classes),\n            nn.ReLU(),\n            # nn.Softmax(dim=1),\n        )\n\n    def attention_net_with_w(self, lstm_out, lstm_hidden):\n        '''\n        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]\n        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]\n        :return: [batch_size, n_hidden]\n        '''\n        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)\n        # h [batch_size, time_step, hidden_dims]\n        h = lstm_tmp_out[0] + lstm_tmp_out[1]\n        # h = lstm_out\n        # [batch_size, num_layers * num_directions, n_hidden]\n        lstm_hidden = torch.sum(lstm_hidden, dim=1)\n        # [batch_size, 1, n_hidden]\n        lstm_hidden = lstm_hidden.unsqueeze(1)\n        # atten_w [batch_size, 1, hidden_dims]\n        atten_w = self.attention_layer(lstm_hidden)\n        # m [batch_size, time_step, hidden_dims]\n        m = nn.Tanh()(h)\n        # atten_context [batch_size, 1, time_step]\n        atten_context = torch.bmm(atten_w, m.transpose(1, 2))\n        # softmax_w [batch_size, 1, time_step]\n        softmax_w = F.softmax(atten_context, dim=-1)\n        # context [batch_size, 1, hidden_dims]\n        context = torch.bmm(softmax_w, h)\n        result = context.squeeze(1)\n        return result\n\n    def forward(self, x):\n        \n        # x : [len_seq, batch_size, embedding_dim]\n        x = x.permute(1, 0, 2)\n        output, (final_hidden_state, final_cell_state) = self.lstm_net(x)\n        # output : [batch_size, len_seq, n_hidden * 2]\n        output = output.permute(1, 0, 2)\n        # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]\n        final_hidden_state = final_hidden_state.permute(1, 0, 2)\n        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)\n        # atten_out = self.attention_net(output, final_hidden_state)\n        atten_out = self.attention_net_with_w(output, final_hidden_state)\n        return self.fc_out(atten_out)\n\ndef save(model, filename):\n    save_filename = '{}.pt'.format(filename)\n    torch.save(model, save_filename)\n    print('Saved as %s' % save_filename)\n \ndef train(epoch):\n    global lr, train_acc\n    model.train()\n    batch_idx = 1      \n    total_loss = 0\n    correct = 0\n    pred = np.array([])\n    X_train = text_features[train_dep_idxs+train_non_idxs]\n    Y_train = text_targets[train_dep_idxs+train_non_idxs]\n    for i in range(0, X_train.shape[0], config['batch_size']):\n        if i + config['batch_size'] > X_train.shape[0]:\n            x, y = X_train[i:], Y_train[i:]\n        else:\n            x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(\n                i + config['batch_size'])]\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(y)).type(torch.FloatTensor)\n\n        # 将模型的参数梯度设置为0\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        # 后向传播调整参数\n        loss.backward()\n        # 根据梯度更新网络参数\n        optimizer.step()\n        batch_idx += 1\n        # loss.item()能够得到张量中的元素值\n        pred = np.hstack((pred, output.flatten().detach().numpy()))\n        total_loss += loss.item()\n    train_mae = mean_absolute_error(Y_train, pred)\n\n    print('Train Epoch: {:2d}\\t Learning rate: {:.4f}\\t Loss: {:.4f}\\t MAE: {:.4f}\\t RMSE: {:.4f}\\n '\n        .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \\\n            np.sqrt(mean_squared_error(Y_train, pred))))\n    return train_mae\n\n\ndef evaluate(fold, model, train_mae):\n    model.eval()\n    batch_idx = 1\n    total_loss = 0\n    global min_mae, min_rmse, test_dep_idxs, test_non_idxs\n    pred = np.array([])\n    X_test = text_features[list(test_dep_idxs)+list(test_non_idxs)]\n    Y_test = text_targets[list(test_dep_idxs)+list(test_non_idxs)]\n    with torch.no_grad():\n        if config['cuda']:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\\\n                Variable(torch.from_numpy(Y_test)).cuda()\n        else:\n            x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \\\n                Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)\n\n        optimizer.zero_grad()\n        output = model(x)\n        loss = criterion(output, y.view_as(output))\n        total_loss += loss.item()\n        pred = output.flatten().detach().numpy()\n\n        mae = mean_absolute_error(Y_test, pred)\n        rmse = np.sqrt(mean_squared_error(Y_test, pred))\n\n        print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n        print('='*89)\n\n        if mae <= min_mae and mae < 8.5 and train_mae < 13:\n            min_mae = mae\n            min_rmse = rmse\n            mode = 'bi' if config['bidirectional'] else 'norm'\n            mode ='gru'\n            save(model, os.path.join(prefix, 'Model/Regression/Text{}/BiLSTM_{}_{:.2f}'.format(fold+1, config['hidden_dims'], min_mae)))\n            print('*' * 64)\n            print('model saved: mae: {}\\t rmse: {}'.format(min_mae, min_rmse))\n            print('*' * 64)\n\n    return total_loss\n\nfor fold in range(3):\n    test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]\n    test_non_idxs = non_idxs[fold*44:(fold+1)*44]\n    train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))\n    train_non_idxs = list(set(non_idxs) - set(test_non_idxs))\n\n    # training data augmentation\n    train_dep_idxs = []\n    for (i, idx) in enumerate(train_dep_idxs_tmp):\n        feat = text_features[idx]\n        if i < 14:\n            for i in itertools.permutations(feat, feat.shape[0]):\n                text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n                text_targets = np.hstack((text_targets, text_targets[idx]))\n                train_dep_idxs.append(len(text_features)-1)\n        else:\n            train_dep_idxs.append(idx)\n\n    # test data augmentation\n    # test_dep_idxs = []\n    # for idx in test_dep_idxs_tmp:\n    #     feat = text_features[idx]\n    #     for i in itertools.permutations(feat, feat.shape[0]):\n    #         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))\n    #         text_targets = np.hstack((text_targets, text_targets[idx]))\n    #         test_dep_idxs.append(len(text_features)-1)\n    test_dep_idxs = test_dep_idxs_tmp\n\n\n    model = TextBiLSTM(config)\n\n    if config['cuda']:\n        model = model.cuda()\n\n    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])\n    criterion = nn.SmoothL1Loss()\n    # criterion = FocalLoss(class_num=2)\n    min_mae = 100\n    min_rmse = 100\n    train_mae = 100\n\n\n    for ep in range(1, config['epochs']):\n        train_mae = train(ep)\n        tloss = evaluate(fold, model, train_mae)\n\n# ============== prep ==============\n# X_test = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/val_samples_reg_avid256.npz'))['arr_0'], axis=2)\n# Y_test = np.load(os.path.join(prefix, 'Features/Audio/val_labels_reg_avid256.npz'))['arr_0']\n# ============== prep ==============\n\n\n# ============== SVM ==============\n\n# from sklearn.svm import SVR\n# from sklearn.model_selection import KFold\n\n# X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = SVR(kernel='linear', gamma='auto')\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n#     # break\n\n# print(np.mean(maes), np.mean(rmses))\n# ============== SVM ==============\n\n# # ============== DT ==============\n# from sklearn.tree import DecisionTreeRegressor\n# from sklearn.model_selection import KFold\n\n# X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = DecisionTreeRegressor(max_depth=100, random_state=0, criterion=\"mse\")\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# # ============== DT ==============\n\n# # ============== RF ==============\n# from sklearn.ensemble import RandomForestRegressor\n# from sklearn.model_selection import KFold\n\n# X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = RandomForestRegressor(max_depth=100, random_state=0, criterion=\"mse\")\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# # ============== RF ==============\n\n# ============== ada ==============\n# from sklearn.ensemble import AdaBoostRegressor\n# from sklearn.model_selection import KFold\n\n# X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]\n# kf = KFold(n_splits=3)\n# regr = AdaBoostRegressor(n_estimators=50)\n# maes, rmses = [], []\n# for train_index, test_index in kf.split(X):\n#     # X_train, X_test = X[train_index], X[test_index]\n#     # Y_train, Y_test = Y[train_index], Y[test_index]\n#     X_train, Y_train = X[train_index], Y[train_index]\n#     regr.fit([f.flatten() for f in X_train], Y_train)\n#     pred = regr.predict([f.flatten() for f in X_test])\n\n#     mae = mean_absolute_error(Y_test, pred)\n#     rmse = np.sqrt(mean_squared_error(Y_test, pred))\n#     maes.append(mae)\n#     rmses.append(rmse)\n\n#     print('MAE: {:.4f}\\t RMSE: {:.4f}\\n'.format(mae, rmse))\n#     print('='*89)\n\n# print(np.mean(maes), np.mean(rmses))\n# ============== ada ==============\n"
  },
  {
    "path": "README.md",
    "content": "# ICASSP2022-Depression\nAutomatic Depression Detection: a GRU/ BiLSTM-based Model and An Emotional Audio-Textual Corpus\n\nhttps://arxiv.org/pdf/2202.08210.pdf\n\nhttps://ieeexplore.ieee.org/abstract/document/9746569/\n\n## Code\n\n- Regression\n  - audio_bilstm_perm.py: train audio network \n  - text_bilstm_perm.py: train text network \n  - fuse_net.py: train multi-modal network\n- Classification\n  - audio_features_whole.py: extract audio features\n  - text_features_whole.py: extract text features\n  - audio_gru_whole.py: train audio network \n  - text_bilstm_whole.py: train text network\n  - fuse_net_whole.py: train fuse network\n\n\n## Dataset: EATD-Corpus\n\nThe EATD-Corpus is a dataset consist of audio and text files of 162 volunteers who received counseling.\n\n### How to download\nThe EATD-Corpus can be downloaded at https://1drv.ms/u/s!AsGVGqImbOwYhHUHcodFC3xmKZKK?e=mCT5oN. Password: Ymj26Uv5\n\n### How to use\n\nTraining set contains data from 83 volunteers (19 depressed and 64 non-depressed).\n\nValidation set contains data from 79 volunteers (11 depressed and 68 non-depressed).\n\nEach folder contains depression data for one volunteer.\n\n- {positive/negative/neutral}.wav: Raw audio in wav\n- {positive/negative/neutral}_out.wav: Preprocessed audio. Preprocessing operations include denoising and de-muting\n- {positive/negative/neutral}.txt: Audio translation\n- label.txt: Raw SDS score\n- new_label.txt: Standard SDS score (Raw SDS score multiplied by 1.25)\n"
  }
]