已有账号,去登录
竞赛圈 > 一个特征的baseline,72左右的样子
# -*- coding:utf-8
"""
@Created on 2018/6/22 14:01
@Author: Pengjiaxin
"""
import pandas as pd
import math
import numpy as np
import os
from eval_offline import rmsle
os.chdir('../data')
# 设置最大显示列数,100
pd.set_option('display.max_columns', 200)
# 文件名
train_file='tap_fun_train.csv'
test_file='tap_fun_test.csv'
train_data=pd.read_csv(train_file)
test_data=pd.read_csv(test_file)
train_data=train_data.fillna(0)
test_data=test_data.fillna(0)
print train_data.shape, test_data.shape
y = train_data.pop('prediction_pay_price')
drop = ['user_id', 'register_time']
train_idx=train_data[drop]
test_idx=test_data[drop]
train_data=train_data.drop(drop, axis=1)
test_data=test_data.drop(drop, axis=1)
cols = ['pay_price']
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(train_data[cols], y)
y_prob = lr.predict(test_data[cols])
test_idx['prediction_pay_price'] = y_prob
print test_idx.prediction_pay_price.value_counts()
test_idx[['user_id','prediction_pay_price']].to_csv("sub.csv", index=False)