竞赛圈   > 通通赛——段子队代码

ldz

  • 关注者
  • 关注了

ldz

#=======================入门级规则部分=============================
library(data.table)
rm(list=ls())

getDD <- function(time)
{
	return (as.numeric(substring(time, 8, 9)))
}

getHH <- function(time)
{
    return (as.numeric(substring(time, 11, 12)))
}

compD = function(lat1, lon1, lat2, lon2)
{
	dlat <- lat1 - lat2
	dlon <- lon1 - lon2
	return (12756274 * asin((sin(pi * dlat / 360) ** 2 +
	cos(pi * lat1 / 180) * cos(pi * lat2 / 180) *
	sin(pi * dlon / 360) ** 2) ** 0.5))
}

getDDis <- function(lat, lon)
{
	len <- length(lat)
	value <- rep(0.0, len)
	if (len >= 2)
	{
		value[2:len] <- compD(lat[1 : (len - 1)], lon[1 : (len - 1)], lat[2 : len], lon[2 : len])
	}
	return (value)
}

test_predPaths <- fread("C:/MyWork/competition/DC/JT/DATA/predPaths_test.txt",
header=F,
integer64="numeric",
stringsAsFactors=F,
sep=",",
col.names=c("pathid", "carid", "lat", "lon", "state", "time"))
test_predPaths$index <- 1:nrow(test_predPaths)
test_predPaths[, dd:=getDD(time)]
test_predPaths[, hh:=getHH(time)]

pred <- test_predPaths[order(index), .(dis=sum(getDDis(lat, lon))), pathid]
pred <- pred[order(pathid), .(pathid, time = 477 * log(dis) - 2713)]


#=======================入门级规则进阶部分=============================

#===============================参数设定与函数定义===================================
eta <- 0.00000000000000001
NN <- 3000
N <- 30

getDD <- function(time)
{
	return (as.numeric(substring(time, 8, 9)))
}

getHH <- function(time)
{
    return (as.numeric(substring(time, 11, 12)))
}

getMS <- function(time)
{
	mm <- as.numeric(substring(time, 14, 15))
	ss <- as.numeric(substring(time, 17, 18))
    return (ss + mm * 60)
}

getPidx <- function(lat, lon, N1, N2)
{
	i <- ceiling(N1 * (lat - 30.29) / (31.05 - 30.29))
	j <- ceiling(N2 * (lon - 103.27) / (104.61 - 103.27))
	return ((i - 1) * N2 + j)
}

compD = function(lat1, lon1, lat2, lon2)
{
	dlat <- lat1 - lat2
	dlon <- lon1 - lon2
	return (12756274 * asin((sin(pi * dlat / 360) ** 2 +
	cos(pi * lat1 / 180) * cos(pi * lat2 / 180) *
	sin(pi * dlon / 360) ** 2) ** 0.5))
}

getDDis <- function(lat, lon)
{
	len <- length(lat)
	value <- rep(0.0, len)
	if (len >= 2)
	{
		value[2:len] <- compD(lat[1 : (len - 1)], lon[1 : (len - 1)], lat[2 : len], lon[2 : len])
	}
	return (value)
}

getDMs <- function(ms)
{
	len <- length(ms)
	value <- rep(0.0, len)
	if (len >= 2)
	{
		value[2:len] <- ms[2 : len] - ms[1 : (len - 1)]
	}
	return (value)
}

#===============================导入预测数据===================================
test_predPaths <- fread("C:/MyWork/competition/DC/JT/DATA/predPaths_test.txt",
header=F,
integer64="numeric",
stringsAsFactors=F,
sep=",",
col.names=c("pathid", "carid", "lat", "lon", "state", "time"))
test_predPaths$index <- 1:nrow(test_predPaths)
test_predPaths[, dd:=getDD(time)]
test_predPaths[, hh:=getHH(time)]

#===============================对每天的路径分别预测===================================

for (date in 24:30)
{
	#===============================导入当天训练数据===================================
	dtrain <- fread(paste("C:/MyWork/competition/DC/JT/DATA/201408", date, "_train.txt", sep = ""),
	header = F,
	integer64 = "numeric",
	stringsAsFactors = F,
	sep = ",",
	col.names = c("carid", "lat", "lon", "state", "time"))

	dtrain[, dd := getDD(time)]
	dtrain[, hh := getHH(time)]
	dtrain[, ms := getMS(time)]
	dtrain[order(ms), v := getDDis(lat, lon) / (getDMs(ms) + eta), .(carid, hh)]

	#===============================统计估计点速度===================================
	tj <- dtrain[order(ms),
	.(
		lat = 0.5 * (lat[1:(length(lat) - 1)] + lat[2:length(lat)]),
		lon = 0.5 * (lon[1:(length(lon) - 1)] + lon[2:length(lon)]),
		v = v[2:length(v)]
	),
	.(carid, hh)][!is.na(lat) & v > 0 & v < 50]

	tj[, pidx := getPidx(lat, lon, NN, NN)]

	tj <- tj[length(v) >= 5, .(v = median(v)), .(pidx, hh)]

	#===============================当天预测的路径预测===================================
	preddata <- test_predPaths[dd == date]

	preddata <- preddata[order(index),
	.(
		lat0 = lat[1:(length(lat) - 1)],
		lon0 = lon[1:(length(lon) - 1)],
		latn = lat[2:length(lat)],
		lonn = lon[2:length(lon)],
		index = index[1:(length(index) - 1)]
	),
	.(pathid, dd, hh)]

	preddata <- preddata[,
	.(
		lat = seq(from=lat0, to=latn - (latn - lat0) / (N - 1), length.out=N - 1),
		lon = seq(from=lon0, to=lonn - (lonn - lon0) / (N - 1), length.out=N - 1),
		idx = seq(from=(index - 1) * N + 1, to=index * N - 1, length.out=N - 1)
	),
	.(pathid, dd, hh, index)]

	preddata[, pidx:=getPidx(lat, lon, NN, NN)]

	preddata <- merge(preddata, 
	tj[,.(pidx, hh = hh + 1, v)],
	by=c("pidx", "hh"),
	all.x=T)

	preddata <- merge(preddata, 
	tj[,.(pidx, hh = hh - 1, v)],
	by=c("pidx", "hh"),
	all.x=T)
	
	#===============================缺失值填充===================================
	preddata[is.na(v.x) & !is.na(v.y), v.x := v.y]
	preddata[is.na(v.y) & !is.na(v.x), v.y := v.x]
	preddata[, mvx := mean(v.x, na.rm=T), pathid]
	preddata[, mvy := mean(v.y, na.rm=T), pathid]
	preddata[is.na(mvx) & !is.na(mvy), mvx := mvy]
	preddata[is.na(mvy) & !is.na(mvx), mvy := mvx]
	preddata[is.na(mvx), mvx := 4]
	preddata[is.na(mvy), mvy := 4]
	preddata[is.na(v.x), v.x := mvx]
	preddata[is.na(v.y), v.y := mvy]

	#===============================异常速度处理===================================
	preddata[, vx1 := 1 / (v.x + eta)]
	preddata[, vy1 := 1 / (v.y + eta)]
	preddata[vx1 >= 1 / 2.0, vx1 := 1 / 2.0]
	preddata[vy1 >= 1 / 2.0, vy1 := 1 / 2.0]
	preddata[vx1 <= 1 / 60, vx1 := 1 / 60]
	preddata[vy1 <= 1 / 60, vy1 := 1 / 60]

	#===============================预测===================================
	preddata[order(idx), ddis:=getDDis(lat, lon), pathid]

	pred <- preddata[order(idx),
	.(
		t1 = sum(0.5 * ddis[2:length(ddis)] * (vx1[1:(length(vx1) - 1)] + vx1[2:length(vx1)])),
		t2 = sum(0.5 * ddis[2:length(ddis)] * (vy1[1:(length(vy1) - 1)] + vy1[2:length(vy1)]))
	),
	.(pathid)]
	pred[t1 <= 3599 & t2 <= 3599, time := 0.5 * (t1 + t2)]
	pred[t1 > 3599 & t2 <= 3599, time := t2]
	pred[t1 <= 3599 & t2 > 3599, time := t1]
	pred[t1 > 3599 & t2 > 3599, time := 3599]
	pred <- pred[,.(pathid, time)]


	filename <- paste("C:/MyWork/competition/DC/JT/RESULT/sdgjyc", date, ".csv", sep="")
	write.table(pred, 
	filename, 
	col.names = T, 
	quote = F, 
	row.names = F, 
	sep = ",")
}

#========================融合=========
lcyc <- fread("C:/MyWork/competition/DC/JT/RESULT/lcyc.csv",
header=T,
integer64="numeric",
stringsAsFactors=F,
sep=",")

sdgjyc <- NULL

for (date in 24 : 30)
{
	sdgjyc <- rbind(sdgjyc, fread(paste("C:/MyWork/competition/DC/JT/RESULT/sdgjyc", date, ".csv", sep=""),
	header=T,
	integer64="numeric",
	stringsAsFactors=F,
	sep=","))
}

pred <- merge(sdgjyc,
lcyc,
by="pathid",
all.x=T)[order(pathid), .(pathid, time = 0.65 * time.x + 0.35 * time.y)]

write.table(pred, 
"C:/MyWork/competition/DC/JT/RESULT/pred.csv", 
col.names = T, 
quote = F, 
row.names = F, 
sep = ",")



4条评论

分享

4条评论
意见反馈
关注微信公众号 关注微信公众号

扫一扫分享给周围朋友