SQL Tables

This script is part of the R extraction process and follows on from GetTablesFromXPaths.R and VariableCreation.R by providing the functions to push the resulting tfl object to a PostgreSQL database.

This is in contrast to the (significantly more efficient) Python extraction script FeatherUpload.R which follows on from XMLParsing.py and is used in PrepareData.sh.

Source code

# docker run --name postgres_london_tube -p 5432:5432 -d -e POSTGRES_PASSWORD=mysecretpassword postgres:alpine

Define row uniqueness

primarykeys <- list(
NptgLocalities = '"NptgLocalityRef"',
StopPoints = '"AtcoCode"',
RouteLinks = '"RouteLinkID"',
Routes = '"PrivateCode"',
JourneyPatternTimingLinks = '"JourneyPatternTimingLinkID"',
Services = '"ServiceCode"',
JourneyPatterns = '"JourneyPatternID"',
VehicleJourneys = '"VehicleJourneyCode"'
)
library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
host <- "localhost"
user <- "postgres"
password <- "mysecretpassword"
con <- dbConnect(drv, host = host, user = user, password = password,
dbname = "postgres")
dbSendQuery(con, "CREATE DATABASE londontube_r;")
dbDisconnect(con)

Create connection function for later use

get_con <- function() {
dbConnect(drv,
host = host, user = user, password = password,
dbname = "londontube_r")
}

Setup “CREATE TABLE … INSERT INTO” template"

create_tables <- function(tablename, tabledata, primary_key_cols, con){
# Insert data
dbWriteTable(con, tablename, tabledata, overwrite = TRUE, row.names = FALSE)
# Add Primary Keys
query <- sprintf(
"ALTER TABLE %1$s
ADD PRIMARY KEY (%2$s);",
tablename, # Current table name
paste(primary_key_cols, collapse = ", ") # PK columns
)
# Run queries
dbExecute(con, query)
}
# Build database with the same tablenames as input data
con <- get_con()
tablenames <- tolower(names(tfl))
table_creation <- purrr::pmap(list(tablenames, tfl, primarykeys),
create_tables, con)
dbDisconnect(con)

Copyright © Ruaridh Williamson 2017

Powered by TfL Open Data // Contains OS data © Crown copyright and database rights 2016