Paris, France

Luc Gential

Convertir les résultats de course de L-​Chrono (PDF) pour Kikourou (CSV)

Le script est écrit en R et fait appel au bash.

rm(list = ls(all.names = TRUE)) ; gc()
library(dplyr)
library(data.table)
library(stringr)

path <- "/home/lg/trail/resultats"
setwd(dir = path)

# 0. download pdf
url <- "http://www.l-chrono.com/resultats2016/trail_du_galibier_46.pdf"
pdffile <- basename(path = url)
download.file(url = url, destfile = pdffile)

# and set file names
txtfile <- gsub(pattern = ".pdf", replacement = "_1.txt", x = pdffile)
regfile <- gsub(pattern = ".pdf", replacement = "_2.txt", x = pdffile)
outfile <- gsub(pattern = ".pdf", replacement = ".csv", x = pdffile)

# 1. convert pdf to text
cmd1 <- sprintf("pdftotext -layout -raw %s %s", pdffile, txtfile)
system(cmd1)

# 2. select lines matching regex
pattern <- "(\\d+) (.*) n°\\d+ \\d\\d \\d+ (..)([MF]) \\d+ [MF] (\\d\\d:\\d\\d:\\d\\d) \\d+\\.\\d\\d (.*)$"
cmd2 <- sprintf('grep -P "%s" %s > %s', pattern, txtfile, regfile)
system(cmd2)

# 3. capture regex groups into columns
x <- readLines(con = regfile)
out <- str_match(string = x, pattern = pattern)

# 4. save data
DF <- out[,2:7] %>% as.data.frame()
setDT(x = DF)
setnames(x = DF, old = c("V1","V2","V3","V4","V5","V6"), new = c("class","nom","cat","sexe","temps","club"))
setcolorder(x = DF, neworder = c("class","temps","nom","cat","sexe","club"))
DF[club %in% c("/"), ':='(club = ""), ]
write.table(x = DF, file = outfile, sep = ";", row.names = FALSE, col.names = TRUE, quote = FALSE, fileEncoding = "ISO-8859-1")

Read more in Running

Mentions légales