[
  {
    "path": "README.md",
    "content": "# tianchi_bigdata\n任务：\n\n[详见天池大数据任务介绍](http://tianchi.aliyun.com/competition/information.htm?spm=0.0.0.0.y1LXeD&raceId=1)\n\n特征（39维）：\n\n\tuser特征、item特征、user-item特征、全局比例特征\n\t\n数据采样\n\n\t采用移动窗口target（17、15、13、11、9）+移动窗口样本采样（1、3、7、全部）\n\n训练数据\n\n\t正样本：15000，负样本：130000\n\t\n测试数据\n\n\t同样采用移动窗口变换采样，取了3天、5天、9天的做实验，最优提交为9天的，测试样本大小：155万\n\t\n结果划分\n\n\t结果最终取置信度0.78,取470条结果(子集结果)，最终f1值：11.46%\n\t\n\t排名:25/7200，队伍名：叮当\n\t\n学习模型\n\n\tRF\n\n程序架构\n\t\n\tcombine_feature_txt:混合正负样本特征\n\t\n\tcut_data_set.py:按照移动窗口方式，分割数据集\n\t\n\tfetch_feature.py：提取特征\n\t\n\tfetch_negative_sample:负样本抽样\n\t\n\tfetch_sample:提取正、负样本\n\t\n\tget_feature_vector_txt_4.py:提取特征向量，去掉用户-商品标示\n\t\n\tget_recommend_result_6.py:对最后分类结果取置信度，并得到相应的推荐结果\n\t\n\tglobal_feature.py:提取全局比例特征\n\t\n\tproduct_test_data.py:产生测试数据\n\t\n\tclassify_user_item.py:训练学习特征，并预测\n\t\n[大赛排名] (http://tianchi.aliyun.com/competition/rankingList.htm?spm=0.0.0.0.OyeBsu&season=0&raceId=1&pageIndex=2)\n"
  },
  {
    "path": "classify_user_item.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nfrom sklearn.linear_model import LogisticRegression\r\nfrom sklearn.ensemble import RandomForestClassifier\r\nfrom sklearn.ensemble import GradientBoostingClassifier\r\nimport numpy as np\r\nf = open(\"train_sample.txt\")\r\nf.readline()\r\ndata = np.loadtxt(f)\r\nX = data[:, :-1]  # select columns 1 through end\r\ny = data[:, -1]   # select column 0, the stock price\r\nprint X\r\nprint y\r\nprint 'start train'\r\n\r\nclf2 = RandomForestClassifier(n_estimators=100)\r\n#clf2=GradientBoostingClassifier()\r\nclf2.fit(X,y)\r\n#clf2 = LogisticRegression().fit(X, y)\r\nprint clf2.classes_\r\nf1=open(\"test_data_9feature.txt\")\r\ndata1=np.loadtxt(f1)\r\nX_new=data1[:,:]\r\nprint 'testing data is ok'\r\nresult=clf2.predict_proba(X_new)\r\nprint 'output result'\r\nprint result\r\nf_result=open('result9.txt','w')\r\nfor i in range(0,len(result)):\r\n    f_result.write(str(result[i])+'\\n')\r\n\r\n"
  },
  {
    "path": "combine_feature_txt.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\ni=9\r\nf1=open('combine_txt_positive_feature.txt','wb')\r\nwhile (i<18):\r\n    filename='2014-12-'+str(i)\r\n    filename=filename+'_positive_user_item_feature.txt'\r\n    f=open(filename,'r')\r\n    lines=f.readlines()\r\n    for line in lines:\r\n        f1.write(line)\r\n    i=i+1"
  },
  {
    "path": "cut_data_set.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\nreader=csv.reader(file('filter_user.csv', 'rb'))\r\ncsvfile = file('9_1_data.csv', 'wb')\r\nwriter1=csv.writer(csvfile)\r\ncsvfile = file('9_3_data.csv', 'wb')\r\nwriter2=csv.writer(csvfile)\r\ncsvfile = file('9_7_data.csv', 'wb')\r\nwriter3=csv.writer(csvfile)\r\ncsvfile = file('9_all_data.csv', 'wb')\r\nwriter4=csv.writer(csvfile)\r\n##############################################取样本子集,后三天的作为验证日期，正样本10000个，负样本取100000个，最后对样本子集空间进行预测######################################\r\n\r\n#################################取的是样本全集#######################################################################\r\npositive_user_item=set()\r\nnum=0\r\nfor line in reader:\r\n    if num==0:\r\n        num=num+1\r\n        continue\r\n    time_s=line[5].split(' ')\r\n    time_slot=time_s[0].split('-')\r\n    month=int(time_slot[1])\r\n    day=int(time_slot[2])\r\n    dis_day=(12-month)*30+(19-day)\r\n    if dis_day>=10 and dis_day<=11 :\r\n        writer1.writerow(line)\r\n    if dis_day>=10 and dis_day<=13:\r\n        writer2.writerow(line)\r\n    if dis_day>=10 and dis_day<=17:\r\n        writer3.writerow(line)\r\n    if dis_day>=10 :\r\n        writer4.writerow(line)\r\n    num=num+1\r\n"
  },
  {
    "path": "fetch_feature.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\n##################抽取如下特征,浏览数、收藏数、购物车、购买数、平均活跃天数、最后活跃天数距离最终时间的天数,先不考虑平均活跃天数####\r\n\r\nimport csv\r\ndef fetch_feature(sample_filename,feature_filename,item_brand):\r\n    reader=csv.reader(file(sample_filename, 'rb'))\r\n    csvfile = file(feature_filename, 'wb')\r\n    writer=csv.writer(csvfile)\r\n###################################定义统计变量###########################################\r\n    user_item_click=dict()#(u,i)点击次数\r\n    usr_item_hide=dict()#(u,i)收藏次数\r\n    usr_item_shop_basket=dict()#(u,i)购物车次数\r\n    num=0\r\n    user_item_pair=set()#(u,i)对\r\n    user_basket=dict()#（u）购物车件数\r\n    usr_item_shop=dict()#(用户-item购买次数)\r\n    item_num=dict()\r\n    item_user=dict()\r\n    item_click=dict()\r\n    item_basket=dict()\r\n    item_hide=dict()\r\n    user_buy_brand=dict()\r\n    user_buy_item_brand=dict()\r\n    user_item_num=dict()\r\n    user_brand=dict()\r\n    user_buy=dict()\r\n    user_click=dict()\r\n    user_hide=dict()\r\n    user_item_time=dict()\r\n    user_click_item_brand=dict()\r\n    user_basket_item_brand=dict()\r\n    catogery_buy=dict()\r\n    catogery_click=dict()\r\n    catogery_basket=dict()\r\n    catogery_hide=dict()\r\n    ###################################初始化#############################\r\n    for line in reader:\r\n        if line[5].find('2014-12-17')<0:\r\n            item_brand[line[1]]=line[4]\r\n            user_hide[line[0]]=0\r\n            user_click[line[0]]=0\r\n            user_buy[line[0]]=0\r\n            user_brand[line[0]]=set()\r\n            user_item_num[line[0]]=set()\r\n            user_item_click[(line[0],line[1])]=0\r\n            usr_item_hide[(line[0],line[1])]=0\r\n            usr_item_shop_basket[(line[0],line[1])]=0\r\n            user_item_pair.add((line[0],line[1]))\r\n            usr_item_shop[(line[0],line[1])]=0\r\n            item_num[line[1]]=0\r\n            item_user[line[1]]=set()\r\n            item_click[line[1]]=0\r\n            item_basket[line[1]]=0\r\n            item_hide[line[1]]=0\r\n            user_buy_brand[line[0]]=set()\r\n            user_buy_item_brand[(line[0],item_brand[line[1]])]=0\r\n            user_click_item_brand[(line[0],item_brand[line[1]])]=0\r\n            user_basket_item_brand[(line[0],item_brand[line[1]])]=0\r\n            user_basket[line[0]]=0\r\n            catogery_buy[item_brand[line[1]]]=0\r\n            catogery_click[item_brand[line[1]]]=0\r\n            catogery_basket[item_brand[line[1]]]=0\r\n            catogery_hide[item_brand[line[1]]]=0\r\n            num=num+1\r\n#####################################统计特征############################################\r\n    for line in csv.reader(file(sample_filename, 'rb')):\r\n        if line[5].find('2014-12-17')<0:\r\n            time_s=line[5].split(' ')\r\n            time_slot=time_s[0].split('-')\r\n            month=int(time_slot[1])\r\n            day=int(time_slot[2])\r\n            dis_day=(12-month)*30+(17-day)####间隔时间\r\n            if (line[0],line[1]) not in user_item_time:\r\n                user_item_time[line[0],line[1]]=set()\r\n                user_item_time[line[0],line[1]].add(dis_day)\r\n            else:\r\n                user_item_time[line[0],line[1]].add(dis_day)\r\n            if line[2]=='1':\r\n                #################用户对该商品的点击总数############################\r\n                if (line[0],line[1]) not in user_item_click:\r\n                    user_item_click[(line[0],line[1])]=1\r\n                else:\r\n                    user_item_click[(line[0],line[1])]=1+user_item_click[(line[0],line[1])]\r\n                ####用户点击总数#######\r\n                user_click[line[0]]=user_click[line[0]]+1\r\n                ###############统计点击次数###############################\r\n                #########################统计用户对该商品所对应类型的次数#######################\r\n                user_click_item_brand[(line[0],item_brand[line[1]])]=1+user_click_item_brand[(line[0],item_brand[line[1]])]\r\n                #########################商品对应的种类被点击的次数#############\r\n                catogery_click[item_brand[line[1]]]=catogery_click[item_brand[line[1]]]+1\r\n                ###################商品被点击的总数####################\r\n                item_click[line[1]]=item_click[line[1]]+1\r\n            if line[2]=='2':\r\n                if (line[0],line[1]) not in usr_item_hide:\r\n                    usr_item_hide[(line[0],line[1])]=1\r\n                else:\r\n                    usr_item_hide[(line[0],line[1])]=1+usr_item_hide[(line[0],line[1])]\r\n                #############用户收藏总数################\r\n                user_hide[line[0]]=user_hide[line[0]]+1\r\n                ################商品类型被收藏的次数############\r\n                catogery_hide[item_brand[line[1]]]=catogery_hide[item_brand[line[1]]]+1\r\n                ################商品被加入收藏的次数############\r\n                item_hide[line[1]]=item_hide[line[1]]+1\r\n            if line[2]=='3':\r\n                ################（u,i）加入购物车的次数##############\r\n                if (line[0],line[1]) not in usr_item_shop_basket:\r\n                    usr_item_shop_basket[(line[0],line[1])]=1\r\n                else:\r\n                    usr_item_shop_basket[(line[0],line[1])]=1+usr_item_shop_basket[(line[0],line[1])]\r\n                ############用户加入购物车的总数#######################\r\n                user_basket[line[0]]=user_basket[line[0]]+1\r\n                #########################统计用户对该商品所对应类型的购物车次数###################\r\n                user_basket_item_brand[(line[0],item_brand[line[1]])]=1+user_basket_item_brand[(line[0],item_brand[line[1]])]\r\n                ########################商品种类被加入购物车的次数#####################\r\n                catogery_basket[item_brand[line[1]]]=catogery_basket[item_brand[line[1]]]+1\r\n                ################商品被加入购物车的次数############\r\n                item_basket[line[1]]=item_basket[line[1]]+1\r\n            if line[2]=='4':\r\n                ##############################该用户购买该商品的次数############################\r\n                if (line[0],line[1]) not in usr_item_shop:\r\n                    usr_item_shop[(line[0],line[1])]=1\r\n                else:\r\n                    usr_item_shop[(line[0],line[1])]=usr_item_shop[(line[0],line[1])]+1\r\n                #############用户购买商品的总次数#########################\r\n                user_buy[line[0]]=user_buy[line[0]]+1\r\n                ###########################统计该商品被购买的次数##############################\r\n                item_num[line[1]]=item_num[line[1]]+1\r\n                ###############商品被多少人购买####################\r\n                item_user[line[1]].add((line[0]))\r\n                ##########################种类被购买的次数######################\r\n                catogery_buy[item_brand[line[1]]]=catogery_buy[item_brand[line[1]]]+1\r\n                ################用户购买商品类型的总数############\r\n                user_buy_brand[line[0]].add(item_brand[line[1]])\r\n                ####################用户购买该类型商品种类的数目###########\r\n                user_buy_item_brand[(line[0],item_brand[line[1]])]=1+user_buy_item_brand[(line[0],item_brand[line[1]])]\r\n            #############################用户交互的商品数################\r\n            user_item_num[line[0]].add((line[1]))\r\n            ############################用户交互的商品品牌数####################\r\n            user_brand[line[0]].add(item_brand[line[1]])\r\n#####################################写结果##################################################################\r\n    for k in user_item_pair:\r\n        ####################用户交互的商品数与购买的商品数之比######################\r\n        if user_buy[k[0]]!=0:\r\n            comm_item_ratio=float(\"%.2f\"%(len(user_item_num[k[0]])/user_buy[k[0]]))\r\n        else:\r\n            comm_item_ratio=0\r\n        #################用户交互的商品品牌数与购买的商品品牌数之比##################\r\n        if len(user_buy_brand[k[0]])!=0:\r\n            comm_brand_buy_ratio=float(\"%.2f\"%(len(user_brand[k[0]])/len(user_buy_brand[k[0]])))\r\n        else:\r\n            comm_brand_buy_ratio=0\r\n        ###################该类型商品点击与购买的比例###############\r\n        if catogery_buy[item_brand[k[1]]]!=0:\r\n            catogry_click_buy=float(\"%.2f\"%(catogery_click[item_brand[k[1]]]/catogery_buy[item_brand[k[1]]]))\r\n        else:\r\n            catogry_click_buy=0\r\n        ###################该类型商品加入购物车与购买的比例###############\r\n        if catogery_buy[item_brand[k[1]]]!=0:\r\n            catogry_basket_buy=float(\"%.2f\"%(catogery_basket[item_brand[k[1]]]/catogery_buy[item_brand[k[1]]]))\r\n        else:\r\n            catogry_basket_buy=0\r\n        ####购买该商品所对应的类型占总的购买量的比例########################\r\n        if user_buy[k[0]]!=0:\r\n            buy_catogry_ratio=float(\"%.2f\"%(user_buy_item_brand[(k[0],item_brand[k[1]])]/user_buy[k[0]]))\r\n        else:\r\n            buy_catogry_ratio=0\r\n        ##########################################点击该商品所对应的类型占总的点击量的比例####################\r\n        if user_click[k[0]]!=0:\r\n            click_catogry_ratio=float(\"%.2f\"%(user_click_item_brand[(k[0],item_brand[k[1]])]/user_click[k[0]]))\r\n        else:\r\n            click_catogry_ratio=0\r\n        #########################################购物车该商品所对应的类型占总的购物车的比例######################\r\n        if user_basket[k[0]]!=0:\r\n            basket_catogry_ratio=float(\"%.2f\"%(user_basket_item_brand[(k[0],item_brand[k[1]])]/user_basket[k[0]]))\r\n        else:\r\n            basket_catogry_ratio=0\r\n        ####用户点击购买比例###############\r\n        if user_buy[k[0]]!=0:\r\n            click_buy_user_ratio=float(\"%.2f\"%(user_click[k[0]]/user_buy[k[0]]))\r\n        else:\r\n            click_buy_user_ratio=0\r\n        ######用户-商品对购物车与购买的比例####################\r\n        if usr_item_shop[k]!=0:\r\n            basket_buy_ratio=float(\"%.2f\"%(usr_item_shop_basket[k]/usr_item_shop[k]))\r\n        else:\r\n            basket_buy_ratio=0\r\n        ##########用户-商品点击与购物车的比例######\r\n        if usr_item_shop_basket[k]!=0:\r\n            click_basket=float(\"%.2f\"%(user_item_click[k]/usr_item_shop_basket[k]))\r\n        else:\r\n            click_basket=0\r\n        ##################用户购物车与购买的比例#####################\r\n        if user_buy[k[0]]!=0:\r\n            basket_buy_user_ratio=float(\"%.2f\"%(user_basket[k[0]]/user_buy[k[0]]))\r\n        else:\r\n            basket_buy_user_ratio=0\r\n        #################用户点击与购物车的比例###################\r\n        if user_basket[k[0]]!=0:\r\n            ratio_click_basket=float(\"%.2f\"%(user_click[k[0]]/user_basket[k[0]]))\r\n        else:\r\n            ratio_click_basket=0\r\n        ######################用户收藏与购物的比例#################\r\n        if user_buy[k[0]]!=0:\r\n            ratio_hide_buy=float(\"%.2f\"%(user_hide[k[0]]/user_buy[k[0]]))\r\n        else:\r\n            ratio_hide_buy=0\r\n        ######################该类型商品收藏与购买的比例#################\r\n        if catogery_buy[item_brand[k[1]]]!=0:\r\n            catogry_hide_buy=float(\"%.2f\"%(catogery_hide[item_brand[k[1]]]/catogery_buy[item_brand[k[1]]]))\r\n        else:\r\n            catogry_hide_buy=0\r\n        ###################用户最早接触该物品的时间以及最晚接触该物品的时间#####################\r\n        sort_user_item_time=list(user_item_time[k])\r\n        eraliest_time=sort_user_item_time[-1]\r\n        latest_time=sort_user_item_time[0]\r\n\r\n        writer.writerow((k[0],k[1],user_item_click[k],user_click[k[0]],usr_item_hide[k],user_hide[k[0]],\\\r\n        usr_item_shop_basket[k],user_basket[k[0]],usr_item_shop[k],user_buy[k[0]],item_num[k[1]],len(item_user[k[1]]),\\\r\n        len(user_buy_brand[k[0]]),user_buy_item_brand[(k[0],item_brand[k[1]])],len(user_item_num[k[0]]),len(user_brand[k[0]]),\r\n        user_click_item_brand[(k[0],item_brand[k[1]])],user_basket_item_brand[(k[0],item_brand[k[1]])],catogery_click[item_brand[k[1]]],\r\n        catogery_hide[item_brand[k[1]]],catogery_basket[item_brand[k[1]]],catogery_buy[item_brand[k[1]]],item_click[k[1]],\r\n        item_hide[k[1]],item_basket[k[1]],\r\n        buy_catogry_ratio,click_buy_user_ratio,basket_buy_ratio,click_basket,basket_buy_user_ratio,ratio_hide_buy,\r\n        ratio_click_basket,click_catogry_ratio,basket_catogry_ratio,catogry_click_buy,catogry_basket_buy,\r\n        catogry_hide_buy,comm_item_ratio,comm_brand_buy_ratio,eraliest_time,latest_time))\r\n####################39维特征##################################\r\n\r\nif __name__=='__main__':\r\n    item_brand=dict()\r\n    fetch_feature('./17/17_1_data.csv',\\\r\n                  './17/17_1_data_feature.csv',item_brand)\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n"
  },
  {
    "path": "fetch_negative_sample.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\nimport random\r\n\r\nnum=1\r\ncsvfile = file('sample_17_negative_user.csv', 'wb')\r\nwriter=csv.writer(csvfile)\r\nfor line in csv.reader(file('17_negative.csv','r')):\r\n    if num%200==0:\r\n        writer.writerow(line)\r\n    num=num+1\r\nprint num"
  },
  {
    "path": "fetch_sample.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\nimport os\r\nbuy=set()\r\nfor line in csv.reader(file('./17/17_1_data.csv','rb')):\r\n    if line[5].find('2014-12-17')>=0:\r\n        if line[2]=='4':\r\n            buy.add((line[0],line[1]))\r\ncsvfile = file('17_negative.csv', 'wb')\r\nwriter=csv.writer(csvfile)\r\n\r\nfiles=os.listdir('./17/')\r\nfor filename in files:\r\n    if filename.find('feature')>=0:\r\n        for line in csv.reader(file('./17/'+filename,'rb')):\r\n            if (line[0],line[1]) not in buy:\r\n                writer.writerow(line)\r\n\r\n"
  },
  {
    "path": "get_feature_vector_txt_4.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:Utf-8\r\nimport csv\r\n##########################加类标,去用户-商品名，取纯特征文档#############################\r\n\r\ndef put_on_label(feature_csv,feature_txt_label):\r\n    f=open(feature_txt_label,'w')\r\n    for line in csv.reader(file(feature_csv, 'rb')):\r\n        f.write(line[2]+' '+line[3]+' '+line[4]+' '+line[5]+' '+line[6]\r\n                +' '+line[7]+line[8]\r\n                +' '+line[9]\r\n                +' '+line[10]\r\n                +' '+line[11]\r\n                +' '+line[12]\r\n                +' '+line[13]\r\n                +' '+line[14]\r\n                +' '+line[15]\r\n                +' '+line[16]\r\n                +' '+line[17]\r\n                +' '+line[18]\r\n                +' '+line[19]\r\n                +' '+line[20]\r\n                +' '+line[21]\r\n                +' '+line[22]\r\n                +' '+line[23]\r\n                +' '+line[24]\r\n                +' '+line[25]\r\n                +' '+line[26]\r\n                +' '+line[27]\r\n                +' '+line[28]\r\n                +' '+line[29]\r\n                +' '+line[30]\r\n                +' '+line[31]\r\n                +' '+line[32]\r\n                +' '+line[33]\r\n                +' '+line[34]\r\n                +' '+line[35]\r\n                +' '+line[36]\r\n                +' '+line[37]\r\n                +' '+line[38]\r\n                +' '+line[39]\r\n                +' '+line[40]\r\n                +'\\n')\r\n\r\nif __name__==\"__main__\":\r\n    feature_csv='global_test_data_feature.csv'\r\n    feature_txt_lable='test_data_9feature.txt'\r\n    put_on_label(feature_csv,feature_txt_lable)\r\n"
  },
  {
    "path": "get_recommend_result_6.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\n\r\n\r\ndef get_result(input_source_result_txt,output_final_result_csv,test_sourc_file_csv):\r\n    result_line_num=set()\r\n    f=open(input_source_result_txt,'r')\r\n    lines=f.readlines()\r\n    num=1\r\n    for line in lines:\r\n        linedata=line[1:-2].strip()\r\n        data=linedata.split('  ')\r\n        #print data[1]\r\n        if float(data[1].strip())>0.78:\r\n            result_line_num.add(num)\r\n        num=num+1\r\n    print len(result_line_num)\r\n\r\n\r\n    csvfile = file(output_final_result_csv, 'wb')\r\n    writer=csv.writer(csvfile)\r\n    num=1\r\n    suspect_user_item=set()\r\n    for line in csv.reader(file(test_sourc_file_csv, 'rb')):\r\n        if num in result_line_num:\r\n            suspect_user_item.add((line[0],line[1]))\r\n            #writer.writerow((line[0],line[1]))\r\n        num=num+1\r\n    sub_item=set()\r\n    for line in csv.reader(file('tianchi_mobile_recommend_train_item.csv','rb')):\r\n        sub_item.add(line[0])\r\n    count=0\r\n    for k in suspect_user_item:\r\n        if k[1] in sub_item:\r\n            writer.writerow((k[0],k[1]))\r\n            count=count+1\r\n    print count\r\n\r\n\r\nif __name__=='__main__':\r\n    input_source_result_txt='result9.txt'\r\n    output_final_result_csv='tianchi_mobile_recommendation_predict_9_434.csv'\r\n    test_sourc_file_csv='global_test_data_feature.csv'\r\n    get_result(input_source_result_txt,output_final_result_csv,test_sourc_file_csv)\r\n\r\n"
  },
  {
    "path": "global_feature.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\nglobal_user_feature=dict()\r\n\r\nfor line in csv.reader(file('9_all_data.csv','rb')):\r\n    global_user_feature[(line[0],line[1])]=line[25:]\r\n\r\ncsvfile = file('gloabal_9_1_data.csv', 'wb')\r\nwriter=csv.writer(csvfile)\r\nfor line in csv.reader(file('9_1_data.csv','rb')):\r\n    k=global_user_feature[(line[0],line[1])]\r\n    writer.writerow((line[0],line[1],line[2],line[3],\r\n                     line[4],line[5],line[6],line[7],\r\n                     line[8],line[9],line[10],line[11],\r\n                     line[12],line[13],line[14],line[15],\r\n                     line[16],line[17],line[18],line[19],\r\n                     line[20],line[21],line[22],line[23],\r\n                     line[24],k[0],k[1],k[2],k[3],k[4],k[5],\r\n                     k[6],k[7],k[8],k[9],k[10],k[11],k[12],\r\n                     k[12],k[13],k[14],k[15]\r\n                     ))"
  },
  {
    "path": "produt_test_data.py",
    "content": "__author__ = 'LiGe'\r\n#encoding:utf-8\r\nimport csv\r\nnum=0\r\n\r\ncsvfile = file('test_data_9.csv', 'wb')\r\nwriter=csv.writer(csvfile)\r\nfor line in csv.reader(file('tianchi_mobile_recommend_train_user.csv','rb')):\r\n    if num==0:\r\n        num=num+1\r\n        continue\r\n    time_s=line[5].split(' ')\r\n    time_slot=time_s[0].split('-')\r\n    month=int(time_slot[1])\r\n    day=int(time_slot[2])\r\n    dis_day=(12-month)*30+(19-day)\r\n    if dis_day<=9:\r\n        writer.writerow(line)"
  }
]