已有账号,去登录
竞赛圈 > 通通赛——段子队代码
#=======================入门级规则部分=============================
library(data.table)
rm(list=ls())
getDD <- function(time)
{
return (as.numeric(substring(time, 8, 9)))
}
getHH <- function(time)
{
return (as.numeric(substring(time, 11, 12)))
}
compD = function(lat1, lon1, lat2, lon2)
{
dlat <- lat1 - lat2
dlon <- lon1 - lon2
return (12756274 * asin((sin(pi * dlat / 360) ** 2 +
cos(pi * lat1 / 180) * cos(pi * lat2 / 180) *
sin(pi * dlon / 360) ** 2) ** 0.5))
}
getDDis <- function(lat, lon)
{
len <- length(lat)
value <- rep(0.0, len)
if (len >= 2)
{
value[2:len] <- compD(lat[1 : (len - 1)], lon[1 : (len - 1)], lat[2 : len], lon[2 : len])
}
return (value)
}
test_predPaths <- fread("C:/MyWork/competition/DC/JT/DATA/predPaths_test.txt",
header=F,
integer64="numeric",
stringsAsFactors=F,
sep=",",
col.names=c("pathid", "carid", "lat", "lon", "state", "time"))
test_predPaths$index <- 1:nrow(test_predPaths)
test_predPaths[, dd:=getDD(time)]
test_predPaths[, hh:=getHH(time)]
pred <- test_predPaths[order(index), .(dis=sum(getDDis(lat, lon))), pathid]
pred <- pred[order(pathid), .(pathid, time = 477 * log(dis) - 2713)]
#=======================入门级规则进阶部分=============================
#===============================参数设定与函数定义===================================
eta <- 0.00000000000000001
NN <- 3000
N <- 30
getDD <- function(time)
{
return (as.numeric(substring(time, 8, 9)))
}
getHH <- function(time)
{
return (as.numeric(substring(time, 11, 12)))
}
getMS <- function(time)
{
mm <- as.numeric(substring(time, 14, 15))
ss <- as.numeric(substring(time, 17, 18))
return (ss + mm * 60)
}
getPidx <- function(lat, lon, N1, N2)
{
i <- ceiling(N1 * (lat - 30.29) / (31.05 - 30.29))
j <- ceiling(N2 * (lon - 103.27) / (104.61 - 103.27))
return ((i - 1) * N2 + j)
}
compD = function(lat1, lon1, lat2, lon2)
{
dlat <- lat1 - lat2
dlon <- lon1 - lon2
return (12756274 * asin((sin(pi * dlat / 360) ** 2 +
cos(pi * lat1 / 180) * cos(pi * lat2 / 180) *
sin(pi * dlon / 360) ** 2) ** 0.5))
}
getDDis <- function(lat, lon)
{
len <- length(lat)
value <- rep(0.0, len)
if (len >= 2)
{
value[2:len] <- compD(lat[1 : (len - 1)], lon[1 : (len - 1)], lat[2 : len], lon[2 : len])
}
return (value)
}
getDMs <- function(ms)
{
len <- length(ms)
value <- rep(0.0, len)
if (len >= 2)
{
value[2:len] <- ms[2 : len] - ms[1 : (len - 1)]
}
return (value)
}
#===============================导入预测数据===================================
test_predPaths <- fread("C:/MyWork/competition/DC/JT/DATA/predPaths_test.txt",
header=F,
integer64="numeric",
stringsAsFactors=F,
sep=",",
col.names=c("pathid", "carid", "lat", "lon", "state", "time"))
test_predPaths$index <- 1:nrow(test_predPaths)
test_predPaths[, dd:=getDD(time)]
test_predPaths[, hh:=getHH(time)]
#===============================对每天的路径分别预测===================================
for (date in 24:30)
{
#===============================导入当天训练数据===================================
dtrain <- fread(paste("C:/MyWork/competition/DC/JT/DATA/201408", date, "_train.txt", sep = ""),
header = F,
integer64 = "numeric",
stringsAsFactors = F,
sep = ",",
col.names = c("carid", "lat", "lon", "state", "time"))
dtrain[, dd := getDD(time)]
dtrain[, hh := getHH(time)]
dtrain[, ms := getMS(time)]
dtrain[order(ms), v := getDDis(lat, lon) / (getDMs(ms) + eta), .(carid, hh)]
#===============================统计估计点速度===================================
tj <- dtrain[order(ms),
.(
lat = 0.5 * (lat[1:(length(lat) - 1)] + lat[2:length(lat)]),
lon = 0.5 * (lon[1:(length(lon) - 1)] + lon[2:length(lon)]),
v = v[2:length(v)]
),
.(carid, hh)][!is.na(lat) & v > 0 & v < 50]
tj[, pidx := getPidx(lat, lon, NN, NN)]
tj <- tj[length(v) >= 5, .(v = median(v)), .(pidx, hh)]
#===============================当天预测的路径预测===================================
preddata <- test_predPaths[dd == date]
preddata <- preddata[order(index),
.(
lat0 = lat[1:(length(lat) - 1)],
lon0 = lon[1:(length(lon) - 1)],
latn = lat[2:length(lat)],
lonn = lon[2:length(lon)],
index = index[1:(length(index) - 1)]
),
.(pathid, dd, hh)]
preddata <- preddata[,
.(
lat = seq(from=lat0, to=latn - (latn - lat0) / (N - 1), length.out=N - 1),
lon = seq(from=lon0, to=lonn - (lonn - lon0) / (N - 1), length.out=N - 1),
idx = seq(from=(index - 1) * N + 1, to=index * N - 1, length.out=N - 1)
),
.(pathid, dd, hh, index)]
preddata[, pidx:=getPidx(lat, lon, NN, NN)]
preddata <- merge(preddata,
tj[,.(pidx, hh = hh + 1, v)],
by=c("pidx", "hh"),
all.x=T)
preddata <- merge(preddata,
tj[,.(pidx, hh = hh - 1, v)],
by=c("pidx", "hh"),
all.x=T)
#===============================缺失值填充===================================
preddata[is.na(v.x) & !is.na(v.y), v.x := v.y]
preddata[is.na(v.y) & !is.na(v.x), v.y := v.x]
preddata[, mvx := mean(v.x, na.rm=T), pathid]
preddata[, mvy := mean(v.y, na.rm=T), pathid]
preddata[is.na(mvx) & !is.na(mvy), mvx := mvy]
preddata[is.na(mvy) & !is.na(mvx), mvy := mvx]
preddata[is.na(mvx), mvx := 4]
preddata[is.na(mvy), mvy := 4]
preddata[is.na(v.x), v.x := mvx]
preddata[is.na(v.y), v.y := mvy]
#===============================异常速度处理===================================
preddata[, vx1 := 1 / (v.x + eta)]
preddata[, vy1 := 1 / (v.y + eta)]
preddata[vx1 >= 1 / 2.0, vx1 := 1 / 2.0]
preddata[vy1 >= 1 / 2.0, vy1 := 1 / 2.0]
preddata[vx1 <= 1 / 60, vx1 := 1 / 60]
preddata[vy1 <= 1 / 60, vy1 := 1 / 60]
#===============================预测===================================
preddata[order(idx), ddis:=getDDis(lat, lon), pathid]
pred <- preddata[order(idx),
.(
t1 = sum(0.5 * ddis[2:length(ddis)] * (vx1[1:(length(vx1) - 1)] + vx1[2:length(vx1)])),
t2 = sum(0.5 * ddis[2:length(ddis)] * (vy1[1:(length(vy1) - 1)] + vy1[2:length(vy1)]))
),
.(pathid)]
pred[t1 <= 3599 & t2 <= 3599, time := 0.5 * (t1 + t2)]
pred[t1 > 3599 & t2 <= 3599, time := t2]
pred[t1 <= 3599 & t2 > 3599, time := t1]
pred[t1 > 3599 & t2 > 3599, time := 3599]
pred <- pred[,.(pathid, time)]
filename <- paste("C:/MyWork/competition/DC/JT/RESULT/sdgjyc", date, ".csv", sep="")
write.table(pred,
filename,
col.names = T,
quote = F,
row.names = F,
sep = ",")
}
#========================融合=========
lcyc <- fread("C:/MyWork/competition/DC/JT/RESULT/lcyc.csv",
header=T,
integer64="numeric",
stringsAsFactors=F,
sep=",")
sdgjyc <- NULL
for (date in 24 : 30)
{
sdgjyc <- rbind(sdgjyc, fread(paste("C:/MyWork/competition/DC/JT/RESULT/sdgjyc", date, ".csv", sep=""),
header=T,
integer64="numeric",
stringsAsFactors=F,
sep=","))
}
pred <- merge(sdgjyc,
lcyc,
by="pathid",
all.x=T)[order(pathid), .(pathid, time = 0.65 * time.x + 0.35 * time.y)]
write.table(pred,
"C:/MyWork/competition/DC/JT/RESULT/pred.csv",
col.names = T,
quote = F,
row.names = F,
sep = ",")