[
  {
    "path": ".gitattributes",
    "content": "# Auto detect text files and perform LF normalization\n* text=auto\n*.zip filter=lfs diff=lfs merge=lfs -text\n"
  },
  {
    "path": "Preprocess/Drop_Day_and_sub_item.py",
    "content": "import pandas as pd\nimport numpy as np\n\n\n\nif __name__ == '__main__':\n\tuser_table = pd.read_csv('../DataSet/tianchi_fresh_comp_train_user.csv')\n\titem_table = pd.read_csv('../DataSet/tianchi_fresh_comp_train_item.csv')\n\tuser_table = user_table[user_table.item_id.isin(list(item_table.item_id))]\n\tuser_table['days'] = user_table['time'].map(lambda x:x.split(' ')[0])\n\tuser_table['hours'] = user_table['time'].map(lambda x:x.split(' ')[1])\n\tuser_table = user_table[user_table['days'] != '2014-12-12']\n\tuser_table = user_table[user_table['days'] != '2014-12-11']\n\tuser_table.to_csv('../DataSet/drop1112_sub_item.csv',index=None)\n\n"
  },
  {
    "path": "README.md",
    "content": "# TianChi_YiDongTuiJian_forecast\n\n移动推荐新手实战赛学习代码（简单版本）\n\n首先解压数据\n\n\n运行说明：\n\n1.run ./Preprocess/Drop_Day_and_sub_item.py  \n2.run ./feature/extract_feture.py\n\n\n"
  },
  {
    "path": "feature/add_feture.py",
    "content": "import pandas as pd\nimport numpy as np\nimport datetime\nimport sys\nimport time\n\ndef user_click(beforesomeday):#用户在前几天各种操作在各个小时的计数\n\tuser_act_count = pd.crosstab([beforesomeday.user_id,beforesomeday.behavior_type],beforesomeday.hours,dropna=False)\n\tuser_act_count = user_act_count.unstack(fill_value = 0)\n\treturn user_act_count\n\ndef user_liveday(train_user_window1):#用户各个行为活跃的天数\n\tuser_live = train_user_window1.groupby(by = ['user_id','behavior_type']).agg({\"daystime\":lambda x:x.nunique()})\n\tuser_live = user_live.unstack(fill_value = 0)\n\treturn user_live\n\n\ndef user_item_click(beforesomeday):\n\tuser_item_act_count = pd.crosstab([beforesomeday.user_id,beforesomeday.item_id,beforesomeday.behavior_type],beforesomeday.hours)\n\tuser_item_act_count = user_item_act_count.unstack(fill_value = 0)\n\treturn user_item_act_count\n\ndef user_cate_click(beforesomeday):\n\tuser_cate_act_count = pd.crosstab([beforesomeday.user_id,beforesomeday.item_category,beforesomeday.behavior_type],beforesomeday.hours)\n\tuser_cate_act_count = user_cate_act_count.unstack(fill_value = 0)\n\treturn user_cate_act_count\n\ndef user_item_long_touch(train_user_window1):\n\t_live = train_user_window1.groupby(by = ['user_id','item_id']).agg({\"daystime\":lambda x:(x.max()-x.min()).days})\n\treturn _live\n\ndef user_cate_long_touch(train_user_window1):\n\t_live = train_user_window1.groupby(by = ['user_id','item_category']).agg({\"daystime\":lambda x:(x.max()-x.min()).days})\n\treturn _live"
  },
  {
    "path": "feature/extract_feture.py",
    "content": "import pandas as pd\nimport numpy as np\nimport datetime\nimport sys\nimport time\nimport xgboost as xgb\nfrom add_feture import *\nFEATURE_EXTRACTION_SLOT = 10\nLabelDay = datetime.datetime(2014,12,18,0,0,0)\nData = pd.read_csv(\"../DataSet/drop1112_sub_item.csv\")\nData['daystime'] = Data['days'].map(lambda x: time.strptime(x, \"%Y-%m-%d\")).map(lambda x: datetime.datetime(*x[:6]))\n\n\ndef get_train(train_user,end_time):\n    # 取出label day 前一天的记录作为打标记录\n    data_train = train_user[(train_user['daystime'] == (end_time-datetime.timedelta(days=1)))]#&((train_user.behavior_type==3)|(train_user.behavior_type==2))\n    # 训练样本中，删除重复的样本\n    data_train = data_train.drop_duplicates(['user_id', 'item_id'])\n    data_train_ui = data_train['user_id'] / data_train['item_id']\n#    print(len(data_train))\n\n    # 使用label day 的实际购买情况进行打标\n    data_label = train_user[train_user['daystime'] == end_time]\n    data_label_buy = data_label[data_label['behavior_type'] == 4]\n    data_label_buy_ui = data_label_buy['user_id'] / data_label_buy['item_id']\n\n    # 对前一天的交互记录进行打标\n    data_train_labeled = data_train_ui.isin(data_label_buy_ui)\n    dict = {True: 1, False: 0}\n    data_train_labeled = data_train_labeled.map(dict)\n\n    data_train['label'] = data_train_labeled\n    return data_train[['user_id', 'item_id','item_category', 'label']]\n\ndef get_label_testset(train_user,LabelDay):\n    # 测试集选为上一天所有的交互数据\n    data_test = train_user[(train_user['daystime'] == LabelDay)]#&((train_user.behavior_type==3)|(train_user.behavior_type==2))\n    data_test = data_test.drop_duplicates(['user_id', 'item_id'])\n    return data_test[['user_id', 'item_id','item_category']]\n\n\n\ndef item_category_feture(data,end_time,beforeoneday):\n    # data = Data[(Data['daystime']<LabelDay) & (Data['daystime']>LabelDay-datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))]\n    item_count = pd.crosstab(data.item_category,data.behavior_type)\n    item_count_before5=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5+2)]\n        item_count_before5 = pd.crosstab(beforefiveday.item_category,beforefiveday.behavior_type)\n    else:\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5)]\n        item_count_before5 = pd.crosstab(beforefiveday.item_category,beforefiveday.behavior_type)\n    item_count_before_3=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3+2)]\n        item_count_before_3 = pd.crosstab(beforethreeday.item_category,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3)]\n        item_count_before_3 = pd.crosstab(beforethreeday.item_category,beforethreeday.behavior_type)\n\n    item_count_before_2=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7+2)]\n        item_count_before_2 = pd.crosstab(beforethreeday.item_category,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7)]\n        item_count_before_2 = pd.crosstab(beforethreeday.item_category,beforethreeday.behavior_type)\n        \n    # beforeoneday = Data[Data['daystime'] == LabelDay-datetime.timedelta(days=1)]\n    beforeonedayitem_count = pd.crosstab(beforeoneday.item_category,beforeoneday.behavior_type)\n    countAverage = item_count/FEATURE_EXTRACTION_SLOT\n    buyRate = pd.DataFrame()\n    buyRate['click'] = item_count[1]/item_count[4]\n    buyRate['skim'] = item_count[2]/item_count[4]\n    buyRate['collect'] = item_count[3]/item_count[4]\n    buyRate.index = item_count.index\n\n    buyRate_2 = pd.DataFrame()\n    buyRate_2['click'] = item_count_before5[1]/item_count_before5[4]\n    buyRate_2['skim'] = item_count_before5[2]/item_count_before5[4]\n    buyRate_2['collect'] = item_count_before5[3]/item_count_before5[4]\n    buyRate_2.index = item_count_before5.index\n\n    buyRate_3 = pd.DataFrame()\n    buyRate_3['click'] = item_count_before_3[1]/item_count_before_3[4]\n    buyRate_3['skim'] = item_count_before_3[2]/item_count_before_3[4]\n    buyRate_3['collect'] = item_count_before_3[3]/item_count_before_3[4]\n    buyRate_3.index = item_count_before_3.index\n\n\n    buyRate = buyRate.replace([np.inf, -np.inf], 0)\n    buyRate_2 = buyRate_2.replace([np.inf, -np.inf], 0)\n    buyRate_3 = buyRate_3.replace([np.inf, -np.inf], 0)\n    item_category_feture = pd.merge(item_count,beforeonedayitem_count,how='left',right_index=True,left_index=True)\n    item_category_feture = pd.merge(item_category_feture,countAverage,how='left',right_index=True,left_index=True)\n    item_category_feture = pd.merge(item_category_feture,buyRate,how='left',right_index=True,left_index=True)\n    item_category_feture = pd.merge(item_category_feture,item_count_before5,how='left',right_index=True,left_index=True)\n    item_category_feture = pd.merge(item_category_feture,item_count_before_3,how='left',right_index=True,left_index=True)\n    item_category_feture = pd.merge(item_category_feture,item_count_before_2,how='left',right_index=True,left_index=True)\n#    item_category_feture = pd.merge(item_category_feture,buyRate_2,how='left',right_index=True,left_index=True)\n#    item_category_feture = pd.merge(item_category_feture,buyRate_3,how='left',right_index=True,left_index=True)\n    item_category_feture.fillna(0,inplace=True)\n    return item_category_feture\n\ndef item_id_feture(data,end_time,beforeoneday):   \n    # data = Data[(Data['daystime']<LabelDay) & (Data['daystime']>LabelDay-datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))]\n    item_count = pd.crosstab(data.item_id,data.behavior_type)\n    item_count_before5=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5+2)]\n        item_count_before5 = pd.crosstab(beforefiveday.item_id,beforefiveday.behavior_type)\n    else:\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5)]\n        item_count_before5 = pd.crosstab(beforefiveday.item_id,beforefiveday.behavior_type)\n\n    item_count_before_3=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3+2)]\n        item_count_before_3 = pd.crosstab(beforethreeday.item_id,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3)]\n        item_count_before_3 = pd.crosstab(beforethreeday.item_id,beforethreeday.behavior_type)\n\n    item_count_before_2=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7+2)]\n        item_count_before_2 = pd.crosstab(beforethreeday.item_id,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7)]\n        item_count_before_2 = pd.crosstab(beforethreeday.item_id,beforethreeday.behavior_type)\n        \n    item_count_unq = data.groupby(by = ['item_id','behavior_type']).agg({\"user_id\":lambda x:x.nunique()});item_count_unq = item_count_unq.unstack()\n    # beforeoneday = Data[Data['daystime'] == LabelDay-datetime.timedelta(days=1)]\n    beforeonedayitem_count = pd.crosstab(beforeoneday.item_id,beforeoneday.behavior_type)\n    countAverage = item_count/FEATURE_EXTRACTION_SLOT\n    buyRate = pd.DataFrame()\n    buyRate['click'] = item_count[1]/item_count[4]\n    buyRate['skim'] = item_count[2]/item_count[4]\n    buyRate['collect'] = item_count[3]/item_count[4]\n    buyRate.index = item_count.index\n\n    buyRate_2 = pd.DataFrame()\n    buyRate_2['click'] = item_count_before5[1]/item_count_before5[4]\n    buyRate_2['skim'] = item_count_before5[2]/item_count_before5[4]\n    buyRate_2['collect'] = item_count_before5[3]/item_count_before5[4]\n    buyRate_2.index = item_count_before5.index\n\n    buyRate_3 = pd.DataFrame()\n    buyRate_3['click'] = item_count_before_3[1]/item_count_before_3[4]\n    buyRate_3['skim'] = item_count_before_3[2]/item_count_before_3[4]\n    buyRate_3['collect'] = item_count_before_3[3]/item_count_before_3[4]\n    buyRate_3.index = item_count_before_3.index\n\n    buyRate = buyRate.replace([np.inf, -np.inf], 0)\n    buyRate_2 = buyRate_2.replace([np.inf, -np.inf], 0)\n    buyRate_3 = buyRate_3.replace([np.inf, -np.inf], 0)\n    item_id_feture = pd.merge(item_count,beforeonedayitem_count,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,countAverage,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,buyRate,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,item_count_unq,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,item_count_before5,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,item_count_before_3,how='left',right_index=True,left_index=True)\n    item_id_feture = pd.merge(item_id_feture,item_count_before_2,how='left',right_index=True,left_index=True)\n#    item_id_feture = pd.merge(item_id_feture,buyRate_2,how='left',right_index=True,left_index=True)\n#    item_id_feture = pd.merge(item_id_feture,buyRate_3,how='left',right_index=True,left_index=True)\n    item_id_feture.fillna(0,inplace=True)\n    return item_id_feture\n\n\ndef user_id_feture(data,end_time,beforeoneday):   \n    # data = Data[(Data['daystime']<LabelDay) & (Data['daystime']>LabelDay-datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))]\n    user_count = pd.crosstab(data.user_id,data.behavior_type)\n    user_count_before5=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5+2)]\n        user_count_before5 = pd.crosstab(beforefiveday.user_id,beforefiveday.behavior_type)\n    else:\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5)]\n        user_count_before5 = pd.crosstab(beforefiveday.user_id,beforefiveday.behavior_type)\n\n    user_count_before_3=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3+2)]\n        user_count_before_3 = pd.crosstab(beforethreeday.user_id,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3)]\n        user_count_before_3 = pd.crosstab(beforethreeday.user_id,beforethreeday.behavior_type)\n\n    user_count_before_2=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7+2)]\n        user_count_before_2 = pd.crosstab(beforethreeday.user_id,beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7)]\n        user_count_before_2 = pd.crosstab(beforethreeday.user_id,beforethreeday.behavior_type)\n        \n    # beforeoneday = Data[Data['daystime'] == LabelDay-datetime.timedelta(days=1)]\n    beforeonedayuser_count = pd.crosstab(beforeoneday.user_id,beforeoneday.behavior_type)\n    countAverage = user_count/FEATURE_EXTRACTION_SLOT\n    buyRate = pd.DataFrame()\n    buyRate['click'] = user_count[1]/user_count[4]\n    buyRate['skim'] = user_count[2]/user_count[4]\n    buyRate['collect'] = user_count[3]/user_count[4]\n    buyRate.index = user_count.index\n\n    buyRate_2 = pd.DataFrame()\n    buyRate_2['click'] = user_count_before5[1]/user_count_before5[4]\n    buyRate_2['skim'] = user_count_before5[2]/user_count_before5[4]\n    buyRate_2['collect'] = user_count_before5[3]/user_count_before5[4]\n    buyRate_2.index = user_count_before5.index\n\n    buyRate_3 = pd.DataFrame()\n    buyRate_3['click'] = user_count_before_3[1]/user_count_before_3[4]\n    buyRate_3['skim'] = user_count_before_3[2]/user_count_before_3[4]\n    buyRate_3['collect'] = user_count_before_3[3]/user_count_before_3[4]\n    buyRate_3.index = user_count_before_3.index\n\n\n    buyRate = buyRate.replace([np.inf, -np.inf], 0)\n    buyRate_2 = buyRate_2.replace([np.inf, -np.inf], 0)\n    buyRate_3 = buyRate_3.replace([np.inf, -np.inf], 0)\n\n    long_online = pd.pivot_table(beforeoneday,index=['user_id'],values=['hours'],aggfunc=[np.min,np.max,np.ptp])\n\n\n    user_id_feture = pd.merge(user_count,beforeonedayuser_count,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,countAverage,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,buyRate,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,user_count_before5,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,user_count_before_3,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,user_count_before_2,how='left',right_index=True,left_index=True)\n    user_id_feture = pd.merge(user_id_feture,long_online,how='left',right_index=True,left_index=True)\n#    user_id_feture = pd.merge(user_id_feture,buyRate_2,how='left',right_index=True,left_index=True)\n#    user_id_feture = pd.merge(user_id_feture,buyRate_3,how='left',right_index=True,left_index=True)\n    user_id_feture.fillna(0,inplace=True)\n    return user_id_feture\n\n\n\ndef user_item_feture(data,end_time,beforeoneday):   \n    # data = Data[(Data['daystime']<LabelDay) & (Data['daystime']>LabelDay-datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))]\n    user_item_count = pd.crosstab([data.user_id,data.item_id],data.behavior_type)\n    # beforeoneday = Data[Data['daystime'] == LabelDay-datetime.timedelta(days=1)]\n    user_item_count_5=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5+2)]\n        user_item_count_5 = pd.crosstab([beforefiveday.user_id,beforefiveday.item_id],beforefiveday.behavior_type)\n    else:\n        beforefiveday = data[data['daystime']>=end_time-datetime.timedelta(days=5)]\n        user_item_count_5 = pd.crosstab([beforefiveday.user_id,beforefiveday.item_id],beforefiveday.behavior_type)\n    user_item_count_3=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3+2)]\n        user_item_count_3 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_id],beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=3)]\n        user_item_count_3 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_id],beforethreeday.behavior_type)\n\n    user_item_count_2=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7+2)]\n        user_item_count_2 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_id],beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=end_time-datetime.timedelta(days=7)]\n        user_item_count_2 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_id],beforethreeday.behavior_type)\n        \n    beforeonedayuser_item_count = pd.crosstab([beforeoneday.user_id,beforeoneday.item_id],beforeoneday.behavior_type)\n    \n#    _live = user_item_long_touch(data)\n    \n    \n    max_touchtime = pd.pivot_table(beforeoneday,index=['user_id','item_id'],values=['hours'],aggfunc=[np.min,np.max])\n    max_touchtype = pd.pivot_table(beforeoneday,index=['user_id','item_id'],values=['behavior_type'],aggfunc=np.max)\n    user_item_feture = pd.merge(user_item_count,beforeonedayuser_item_count,how='left',right_index=True,left_index=True)\n    user_item_feture = pd.merge(user_item_feture,max_touchtime,how='left',right_index=True,left_index=True)\n    user_item_feture = pd.merge(user_item_feture,max_touchtype,how='left',right_index=True,left_index=True)\n#    user_item_feture = pd.merge(user_item_feture,_live,how='left',right_index=True,left_index=True)\n\n    user_item_feture = pd.merge(user_item_feture,user_item_count_5,how='left',right_index=True,left_index=True)\n    user_item_feture = pd.merge(user_item_feture,user_item_count_3,how='left',right_index=True,left_index=True)\n    user_item_feture = pd.merge(user_item_feture,user_item_count_2,how='left',right_index=True,left_index=True)\n    user_item_feture.fillna(0,inplace=True)\n    return user_item_feture\n\ndef user_cate_feture(data,end_time,beforeoneday):   \n    # data = Data[(Data['daystime']<LabelDay) & (Data['daystime']>LabelDay-datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))]\n    user_item_count = pd.crosstab([data.user_id,data.item_category],data.behavior_type)\n    # beforeoneday = Data[Data['daystime'] == LabelDay-datetime.timedelta(days=1)]\n    \n    user_cate_count_5=None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforefiveday = data[data['daystime']>=(end_time-datetime.timedelta(days=5+2))]\n        user_cate_count_5 = pd.crosstab([beforefiveday.user_id,beforefiveday.item_category],beforefiveday.behavior_type)\n    else:\n        beforefiveday = data[data['daystime']>=(end_time-datetime.timedelta(days=5))]\n        user_cate_count_5 = pd.crosstab([beforefiveday.user_id,beforefiveday.item_category],beforefiveday.behavior_type)\n    user_cate_count_3 = None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=(end_time-datetime.timedelta(days=3+2))]\n        user_cate_count_3 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_category],beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=(end_time-datetime.timedelta(days=3))]\n        user_cate_count_3 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_category],beforethreeday.behavior_type)\n\n\n    user_cate_count_2 = None\n    if (((end_time-datetime.timedelta(days=5))<datetime.datetime(2014,12,13,0,0,0))&((end_time-datetime.timedelta(days=5))>datetime.datetime(2014,12,10,0,0,0))):\n        beforethreeday = data[data['daystime']>=(end_time-datetime.timedelta(days=7+2))]\n        user_cate_count_2 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_category],beforethreeday.behavior_type)\n    else:\n        beforethreeday = data[data['daystime']>=(end_time-datetime.timedelta(days=7))]\n        user_cate_count_2 = pd.crosstab([beforethreeday.user_id,beforethreeday.item_category],beforethreeday.behavior_type)\n        \n#    _live = user_cate_long_touch(data)\n    beforeonedayuser_item_count = pd.crosstab([beforeoneday.user_id,beforeoneday.item_category],beforeoneday.behavior_type)\n    max_touchtime = pd.pivot_table(beforeoneday,index=['user_id','item_category'],values=['hours'],aggfunc=[np.min,np.max])\n    max_touchtype = pd.pivot_table(beforeoneday,index=['user_id','item_category'],values=['behavior_type'],aggfunc=np.max)\n    user_cate_feture = pd.merge(user_item_count,beforeonedayuser_item_count,how='left',right_index=True,left_index=True)\n    user_cate_feture = pd.merge(user_cate_feture,max_touchtime,how='left',right_index=True,left_index=True)\n    user_cate_feture = pd.merge(user_cate_feture,max_touchtype,how='left',right_index=True,left_index=True)\n#    user_cate_feture = pd.merge(user_cate_feture,_live,how='left',right_index=True,left_index=True)\n    user_cate_feture = pd.merge(user_cate_feture,user_cate_count_5,how='left',right_index=True,left_index=True)\n    user_cate_feture = pd.merge(user_cate_feture,user_cate_count_3,how='left',right_index=True,left_index=True)\n    user_cate_feture = pd.merge(user_cate_feture,user_cate_count_2,how='left',right_index=True,left_index=True)\n    user_cate_feture.fillna(0,inplace=True)\n    return user_cate_feture\n\n\nif __name__ == '__main__':\n#    pass\n    result=[]\n    for i in range(15):\n        train_user_window1 = None\n        if (LabelDay >= datetime.datetime(2014,12,12,0,0,0)):\n            train_user_window1 = Data[(Data['daystime'] > (LabelDay - datetime.timedelta(days=FEATURE_EXTRACTION_SLOT+2))) & (Data['daystime'] < LabelDay)]\n        else:\n            train_user_window1 = Data[(Data['daystime'] > (LabelDay - datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))) & (Data['daystime'] < LabelDay)]\n#        train_user_window1 = Data[(Data['daystime'] > (LabelDay - datetime.timedelta(days=FEATURE_EXTRACTION_SLOT))) & (Data['daystime'] < LabelDay)]\n        beforeoneday = Data[Data['daystime'] == (LabelDay-datetime.timedelta(days=1))]\n        # beforetwoday = Data[(Data['daystime'] >= (LabelDay-datetime.timedelta(days=2))) & (Data['daystime'] < LabelDay)]\n        # beforefiveday = Data[(Data['daystime'] >= (LabelDay-datetime.timedelta(days=5))) & (Data['daystime'] < LabelDay)]\n        x = get_train(Data, LabelDay)\n        add_user_click_1 = user_click(beforeoneday)\n        add_user_item_click_1 = user_item_click(beforeoneday)\n        add_user_cate_click_1 = user_cate_click(beforeoneday)\n        # add_user_click_2 = user_click(beforetwoday)\n        # add_user_click_5 = user_click(beforefiveday)\n        liveday = user_liveday(train_user_window1)\n        # sys.exit()\n        a = user_id_feture(train_user_window1, LabelDay,beforeoneday)\n        a = a.reset_index()\n        b = item_id_feture(train_user_window1, LabelDay,beforeoneday)\n        b = b.reset_index()\n        c = item_category_feture(train_user_window1, LabelDay,beforeoneday)\n        c = c.reset_index()\n        d = user_cate_feture(train_user_window1, LabelDay,beforeoneday)\n        d = d.reset_index()\n        e = user_item_feture(train_user_window1, LabelDay,beforeoneday)\n        e = e.reset_index()\n        x = pd.merge(x,a,on=['user_id'],how='left')\n        x = pd.merge(x,b,on=['item_id'],how='left')\n        x = pd.merge(x,c,on=['item_category'],how='left')\n        x = pd.merge(x,d,on=['user_id','item_category'],how='left')\n        x = pd.merge(x,e,on=['user_id','item_id'],how='left')\n        x = pd.merge(x,add_user_click_1,left_on = ['user_id'],right_index=True,how = 'left' )\n        # x = pd.merge(x,add_user_click_2,left_on = ['user_id'],right_index=True,how = 'left' )\n        # x = pd.merge(x,add_user_click_5,left_on = ['user_id'],right_index=True,how = 'left' )\n        x = pd.merge(x,add_user_item_click_1,left_on = ['user_id','item_id'],right_index=True,how = 'left' )\n        x = pd.merge(x,add_user_cate_click_1,left_on = ['user_id','item_category'],right_index=True,how = 'left' )\n        x = pd.merge(x,liveday,left_on = ['user_id'],right_index=True,how = 'left' )\n        x = x.fillna(0)\n        print(i,LabelDay,len(x))\n        LabelDay = LabelDay-datetime.timedelta(days=1)\n        if (LabelDay == datetime.datetime(2014,12,13,0,0,0)):\n            LabelDay = datetime.datetime(2014,12,10,0,0,0)\n        result.append(x)\n    train_set = pd.concat(result,axis=0,ignore_index=True)\n#    train_set.to_csv('train_train_no_jiagou.csv',index=None)\n    ###############################################\n    \n    LabelDay=datetime.datetime(2014,12,18,0,0,0)\n    test = get_label_testset(Data,LabelDay)\n\n    train_user_window1 =  Data[(Data['daystime'] > (LabelDay - datetime.timedelta(days=FEATURE_EXTRACTION_SLOT-1))) & (Data['daystime'] <= LabelDay)]\n    beforeoneday = Data[Data['daystime'] == LabelDay]\n    # beforetwoday = Data[(Data['daystime'] >= (LabelDay-datetime.timedelta(days=2))) & (Data['daystime'] < LabelDay)]\n    # beforefiveday = Data[(Data['daystime'] >= (LabelDay-datetime.timedelta(days=5))) & (Data['daystime'] < LabelDay)]\n    add_user_click = user_click(beforeoneday)\n    add_user_item_click = user_item_click(beforeoneday)\n    add_user_cate_click = user_cate_click(beforeoneday)\n    # add_user_click_2 = user_click(beforetwoday)\n    # add_user_click_5 = user_click(beforefiveday)\n    liveday = user_liveday(train_user_window1)\n    a = user_id_feture(train_user_window1, LabelDay,beforeoneday)\n    a = a.reset_index()\n    b = item_id_feture(train_user_window1, LabelDay,beforeoneday)\n    b = b.reset_index()\n    c = item_category_feture(train_user_window1, LabelDay,beforeoneday)\n    c = c.reset_index()\n    d = user_cate_feture(train_user_window1, LabelDay,beforeoneday)\n    d = d.reset_index()\n    e = user_item_feture(train_user_window1, LabelDay,beforeoneday)\n    e = e.reset_index()\n    test = pd.merge(test,a,on=['user_id'],how='left')\n    test = pd.merge(test,b,on=['item_id'],how='left')\n    test = pd.merge(test,c,on=['item_category'],how='left')\n    test = pd.merge(test,d,on=['user_id','item_category'],how='left')\n    test = pd.merge(test,e,on=['user_id','item_id'],how='left')\n    test = pd.merge(test,add_user_click,left_on = ['user_id'],right_index=True,how = 'left' )\n    # test = pd.merge(test,add_user_click_2,left_on = ['user_id'],right_index=True,how = 'left' )\n    # test = pd.merge(test,add_user_click_5,left_on = ['user_id'],right_index=True,how = 'left' )\n    test = pd.merge(test,add_user_item_click,left_on = ['user_id','item_id'],right_index=True,how = 'left' )\n    test = pd.merge(test,add_user_cate_click,left_on = ['user_id','item_category'],right_index=True,how = 'left' )\n    test = pd.merge(test,liveday,left_on = ['user_id'],right_index=True,how = 'left' )\n    test = test.fillna(0)\n#    test.to_csv('test_test_no_jiagou.csv',index=None)\n#\n#    sys.exit()\n\n    ###############采样\n    train_set_1 = train_set[train_set['label']==1]\n    train_set_0 = train_set[train_set['label']==0]\n    new_train_set_0 = train_set_0.sample(len(train_set_1)*90)\n    train_set = pd.concat([train_set_1,new_train_set_0],axis=0)\n    ###############\n    train_y = train_set['label'].values\n    train_x = train_set.drop(['user_id', 'item_id','item_category', 'label'], axis=1).values\n    test_x = test.drop(['user_id', 'item_id','item_category'], axis=1).values   \n    num_round = 900\n    params = {'max_depth': 4, 'colsample_bytree': 0.8, 'subsample': 0.8, 'eta': 0.02, 'silent': 1,\n              'objective': 'binary:logistic','eval_metric ':'error', 'min_child_weight': 2.5,#'max_delta_step':10,'gamma':0.1,'scale_pos_weight':230/1,\n               'seed': 10}  #\n    plst = list(params.items())\n    dtrain = xgb.DMatrix(train_x, label=train_y)\n    dtest = xgb.DMatrix(test_x)\n    bst = xgb.train(plst, dtrain, num_round)\n    predicted_proba = bst.predict(dtest)\n    #print(predicted_proba)\n\n    predicted_proba = pd.DataFrame(predicted_proba)\n    predicted = pd.concat([test[['user_id', 'item_id']], predicted_proba], axis=1)\n    predicted.columns = ['user_id','item_id','prob']\n    #print(predicted)\n    predicted = predicted.sort_values('prob',  axis=0,ascending=False)\n    #print(predicted)\n#    predict1 = predicted.iloc[:650, [0, 1]]\n#    # 保存到文件\n#    predict1.to_csv(\"../result/10_30_2/650_1B80minchildweight1.8.csv\", index=False)\n    \n    predict2 = predicted.iloc[:700, [0, 1]]\n    # 保存到文件\n    predict2.to_csv(\"../result/result.csv\", index=False)\n    \n#    predict3 = predicted.iloc[:750, [0, 1]]\n#    # 保存到文件\n#    predict3.to_csv(\"../result/10_30_2/750_1B80minchildweight1.8.csv\", index=False)\n    sys.exit()\n#    evaluate(predicted)\n\n\n\n\n    #####################################################################线下验证部分\n    reference = Data[Data['daystime'] == (LabelDay+datetime.timedelta(days=1))]\n    reference = reference[reference['behavior_type'] == 4]  # 购买的记录\n    reference = reference[['user_id', 'item_id']]  # 获取ui对\n    reference = reference.drop_duplicates(['user_id', 'item_id'])  # 去重\n    ui = predicted['user_id'] / predicted['item_id']\n\n    predicted=predicted[ui.duplicated() == False]\n\n    predicted_ui = predicted['user_id'] / predicted['item_id']\n    reference_ui = reference['user_id'] / reference['item_id']\n\n    is_in = predicted_ui.isin(reference_ui)\n    true_positive = predicted[is_in]\n\n    tp = len(true_positive)\n    predictedSetCount = len(predicted)\n    referenceSetCount = len(reference)\n\n    precision = tp / predictedSetCount\n    recall = tp / referenceSetCount\n\n    f_score = 2 * precision * recall / (precision + recall)\n\n    tp = recall * referenceSetCount\n    predictedSetCount = tp / precision\n\n    print('%.8f%% %.8f %.8f %.0f %.0f' %\n          (f_score * 100, precision, recall, tp, predictedSetCount))"
  },
  {
    "path": "result/your result",
    "content": ""
  }
]