|
@@ -0,0 +1,448 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "import numpy as np\n",
|
|
|
+ "from sklearn.ensemble import GradientBoostingClassifier\n",
|
|
|
+ "from sklearn.model_selection import train_test_split,StratifiedKFold\n",
|
|
|
+ "from sklearn.metrics import auc,roc_auc_score\n",
|
|
|
+ "import xgboost as xgb "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 切换项目目录\n",
|
|
|
+ "os.getcwd()\n",
|
|
|
+ "os.chdir(\"/media/sf_share/linux/haoxin\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "ename": "IOError",
|
|
|
+ "evalue": "File output/B_train_dummy.csv does not exist",
|
|
|
+ "output_type": "error",
|
|
|
+ "traceback": [
|
|
|
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
+ "\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)",
|
|
|
+ "\u001b[0;32m<ipython-input-4-dd8f3729a2bf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mb_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'output/B_train_dummy.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mb_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'output/B_test_dummy.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 653\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 405\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 406\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 762\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 763\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 764\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 765\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 983\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 985\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 986\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 987\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1603\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1604\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1605\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1606\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1607\u001b[0m \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
+ "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__ (pandas/_libs/parsers.c:4209)\u001b[0;34m()\u001b[0m\n",
|
|
|
+ "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source (pandas/_libs/parsers.c:8873)\u001b[0;34m()\u001b[0m\n",
|
|
|
+ "\u001b[0;31mIOError\u001b[0m: File output/B_train_dummy.csv does not exist"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "b_train = pd.read_csv('output/B_train_dummy.csv')\n",
|
|
|
+ "b_test = pd.read_csv('output/B_test_dummy.csv') "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# flags = b_train['flag']\n",
|
|
|
+ "# b_train['flag']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "col = [x for x in b_train.columns if x in b_test.columns] \n",
|
|
|
+ "col = [x for x in col if x not in ['no','flag']] \n",
|
|
|
+ "# col_1 = []\n",
|
|
|
+ "# 可加可不加,效果影响不大,删除缺省值的列\n",
|
|
|
+ "# for i in col:\n",
|
|
|
+ "# if '999' not in i:\n",
|
|
|
+ "# col_1.append(i)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "train_X,test_X,train_Y,test_Y = train_test_split(b_train[col],b_train['flag'],test_size=0.2,random_state = 0) \n",
|
|
|
+ "watchlist=[(xgb.DMatrix(train_X,label=train_Y),'train'),(xgb.DMatrix(test_X,label=test_Y),'eval')]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "dtrain_B = xgb.DMatrix(b_train[col],b_train['flag'])\n",
|
|
|
+ "# 线上效果为0.600018的参数\n",
|
|
|
+ "Trate=0.25 \n",
|
|
|
+ "params = {'booster':'gbtree',\n",
|
|
|
+ " 'eta': 0.05, \n",
|
|
|
+ " 'max_depth': 4, \n",
|
|
|
+ " 'max_delta_step': 0,\n",
|
|
|
+ " 'subsample':1, \n",
|
|
|
+ " 'colsample_bytree': 0.9, \n",
|
|
|
+ " 'base_score': Trate, \n",
|
|
|
+ " 'objective': 'binary:logistic', \n",
|
|
|
+ " 'lambda':3,\n",
|
|
|
+ " 'alpha':5\n",
|
|
|
+ " }\n",
|
|
|
+ "params['eval_metric'] = 'auc' \n",
|
|
|
+ "model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=130,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "#0.599155\n",
|
|
|
+ "# Trate=0.15\n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 3, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# # 'objective': 'binary:logitraw', \n",
|
|
|
+ "# 'objective': 'binary:logistic',\n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc'\n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=130,maximize=True,verbose_eval=True) \n",
|
|
|
+ "# model_phase_1_cla = xgb.train(params,xgb.DMatrix(train_X,label=train_Y),num_boost_round=1000,evals=watchlist,early_stopping_rounds=50,maximize=True,verbose_eval=True)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ "# 0.594276\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 5, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':2,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=150,maximize=True,verbose_eval=True) \n",
|
|
|
+ "# model_phase_1_cla = xgb.train(params,xgb.DMatrix(train_X,label=train_Y),num_boost_round=1000,evals=watchlist,early_stopping_rounds=50,maximize=True,verbose_eval=True)\n",
|
|
|
+ "\n",
|
|
|
+ "# 0.595855\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 5, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=150,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.594632\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 5, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=200,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "#0.596701\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 5, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=138,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.596326\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 5, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=120,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.598221\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=150,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.599235\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=138,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "# 0.598832\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=120,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.600018\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=130,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "#0.595537\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 3, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=150,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.593465\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 3, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=130,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "#0.594750\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 3, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=200,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# #0.599226\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=135,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "#0.598256\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=125,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "# 0.599600\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=132,maximize=True,verbose_eval=True) \n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "# 0.599844\n",
|
|
|
+ "# Trate=0.25 \n",
|
|
|
+ "# params = {'booster':'gbtree',\n",
|
|
|
+ "# 'eta': 0.05, \n",
|
|
|
+ "# 'max_depth': 4, \n",
|
|
|
+ "# 'max_delta_step': 0,\n",
|
|
|
+ "# 'subsample':1, \n",
|
|
|
+ "# 'colsample_bytree': 0.9, \n",
|
|
|
+ "# 'base_score': Trate, \n",
|
|
|
+ "# 'objective': 'binary:logistic', \n",
|
|
|
+ "# 'lambda':3,\n",
|
|
|
+ "# 'alpha':5\n",
|
|
|
+ "# }\n",
|
|
|
+ "# params['eval_metric'] = 'auc' \n",
|
|
|
+ "# model_phase_1_cla_2 = xgb.train(params,dtrain_B,num_boost_round=132,maximize=True,verbose_eval=True) "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "pred = model_phase_1_cla_2.predict(xgb.DMatrix(b_test[col_1]))\n",
|
|
|
+ "result1 = pd.DataFrame()\n",
|
|
|
+ "result1['no'] = b_test['no']\n",
|
|
|
+ "result1['pred'] = pred[:] "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "result1.to_csv('subimit_target.csv',index=False)"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 2",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python2"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 2
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython2",
|
|
|
+ "version": "2.7.12"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 1
|
|
|
+}
|