DEV Community

JoseXS
JoseXS

Posted on

Scraping maps.race

Hello!

Because I'm doing a web scraping course, I decided to write a small script to extract the response from a rare version of XML/JSON that they have in https://mapas.race.es, and thus be able to generate a JSON file or several CSV files separated by the following categories

  • Incidents
  • Cameras
  • Radars
  • BlackPoints
  • OilStations
  • Parking
import requests
import json
import pandas as pd

formatData="json" # json / csv
nameFile = "race"
incidents = 1
cameras = 1
radars = 1
oilStations = 1
blackPoints = 1
parkings = 1

url = "https://mapas.race.es/WebServices/srvRace.asmx/ObtenerDatos?pstrIncidencias=" + \
    str(incidents)+"&pstrCamaras="+str(cameras)+"&pstrRadares="+str(radars) + \
    "&pstrGasolineras="+str(oilStations)+"&pstrPuntosNegros=" + \
    str(blackPoints)+"&pstrParking="+str(parkings)

headers = {
    "authority": "infocar.dgt.es",
    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
    "host": "mapas.race.es",
    "referer": "https://mapas.race.es/",
}
response = requests.get(url, headers=headers)
initialText = response.text
splitText = response.text.split('<string xmlns="http://tempuri.org/">')
jsonToLoad = splitText[1].split("</string>")[0]

jsonRequest = json.loads(jsonToLoad)

items = {}
incidentsItems = []
camerasItems = []
radarsItems = []
oilStationsItems = []
blackPointsItems = []
parkingsItems = []


def get_object(type, item, id=None, image=None):
    if type == "incidents":
        return {
            "id": str(id),
            "lat": str(item["Latitud"]),
            "lng": str(item["Longitud"]),
            "type": str(item["Tipo"]),
            "date": item["Fecha"],
            "reason": item["Causa"],
            "level": item["Nivel"],
            "province": item["Provincia"],
            "poblation": item["Poblacion"],
            "street": item["Carretera"]
        }

    if type == "cameras":
        return {
            "id": str(item["Id"]),
            "lat": str(item["Latitud"]),
            "lng": str(item["Longitud"]),
            "image": image
        }

    if type == "radars" or type == "oilStations" or type == "blackPoints" or type == "parkings":
        return {
            "id": str(item["Id"]),
            "lat": str(item["Latitud"]),
            "lng": str(item["Longitud"]),
        }


if incidents == 1:
    i = 1
    for itemIncidence in jsonRequest["Incidencias"]:
        obj = get_object('incidents', itemIncidence, str(i))
        incidentsItems.append(obj)
        i += 1
    items["incidents"] = incidentsItems

if cameras == 1:
    for itemCameras in jsonRequest["Camaras"]:
        image = "http://infocar.dgt.es/etraffic/data/camaras/" + \
            str(itemCameras['Id'])+".jpg"
        obj = get_object('cameras', itemCameras, "", image)
        camerasItems.append(obj)
    items["cameras"] = camerasItems

if radars == 1:
    for itemRadar in jsonRequest["Radares"]:
        obj = get_object('radars', itemRadar)
        radarsItems.append(obj)
    items["radars"] = radarsItems

if oilStations == 1:
    for ItemsOilStation in jsonRequest["Gasolineras"]:
        obj = get_object('oilStations', ItemsOilStation)
        oilStationsItems.append(obj)
    items["oilStations"] = oilStationsItems

if blackPoints == 1:
    for itemBlackPoint in jsonRequest["PuntosNegros"]:
        obj = get_object('blackPoints', itemBlackPoint)
        blackPointsItems.append(obj)
    items["blackPoints"] = blackPointsItems

if parkings == 1:
    for itemParking in jsonRequest["Parking"]:
        obj = get_object('parkings', itemParking)
        parkingsItems.append(obj)
    items["parkings"] = parkingsItems

if formatData == "json":
    f = open(nameFile + '.' + formatData, "w")
    itemsDumps = json.dumps(items, indent=2)
    f.write(itemsDumps)
elif formatData == "csv":
    incidentsDF = pd.DataFrame(items["incidents"])
    camerasDF = pd.DataFrame(items["cameras"])
    radarsDF = pd.DataFrame(items["radars"])
    oilStationsDF = pd.DataFrame(items["oilStations"])
    blackPointsDF = pd.DataFrame(items["blackPoints"])
    parkingsDF = pd.DataFrame(items["parkings"])

    incidentsDF.to_csv(nameFile + "_incidents." + formatData, index=False)
    camerasDF.to_csv(nameFile + "_cameras." + formatData, index=False)
    radarsDF.to_csv(nameFile + "_radars." + formatData, index=False)
    oilStationsDF.to_csv(nameFile + "_oilStations." + formatData)
    blackPointsDF.to_csv(nameFile + "_blackPoints." + formatData, index=False)
    parkingsDF.to_csv(nameFile + "_parkings." + formatData, index=False)

print('βœ… '+ formatData +' file/s generated')

Enter fullscreen mode Exit fullscreen mode

For the vast majority of people who read this, it will not be worth anything, since it is data from Spain, but as a curiosity, you can see how it is done and encourage me to improve it!

Don't be mean, I'm learning Python...

Top comments (0)