DI Martin Schmidt

Logo


I hold a Master's degree in Financial and Actuarial Mathematics from the Vienna University of Technology (graduation with highest distinction) and have been working as a Quantitative Credit Risk Manager within an international banking group in Austria for more than 4 years. In 2020, I joined the National Bank of Austria as an Examiner for IRB credit risk models of significant financial institutions. I have excellent programming and database (Python, R, SAS, SQL, VBA) as well as statistical/mathematical skills and gained hands-on machine learning experience during various Data Science Hackathons (amongst others: Winner of the ‘Coding Challenge on Risk Management‘ hosted by the European Central Bank).

View My GitHub Profile

Route Optimization - Brute Force (Python)

Purpose

The program gets as input a csv-file with locations (longitude and latitude) and calculates the best route (based on time/distance) starting from a specified location using Google’s Distance Matrix API

Here is an example of how the csv-file should be structured



Required packages

import googlemaps as gm
from itertools import permutations
import numpy as np
import pandas as pd


Read CSV-file

def read_locations_csv(path, sep):
    """
    This function reads the csv with specified locations
    Input:
        path - local path to csv
        sep - seperator used in csv
    """
    
    df = pd.read_csv(path, sep)

    # check if csv is structured correctly
    assert (len(set(df.columns.tolist()) & set(["start", "longitude", "latitude", "location"])) == 4), \
           "CSV-file with locations not structured correclty!"
        
    return df


All possible routes between two locations

def get_intermediate_routes(df, do_round):
    """
    All possible partial routes, taking into account the starting
    and end point (if do_round equals True)
    Input:
        df - output from read_locations_csv
        do_round - boolean, wether end equals starting point
    Returns a df with all intermediate routes to be sent to google maps
    distance matrix api
    """
    
    # all possible intermediate routes, cartesian product
    int_routes = (
        df.assign(key=0)
        .merge(df.assign(key=0), on="key")
        .drop("key", axis=1)
    )
    
    # mask based on do_round; if True, then keep routes from X to starting point
    if do_round == True:
        mask = (int_routes["location_x"] != int_routes["location_y"])
    else:
        mask = (int_routes["location_x"] != int_routes["location_y"]) & (int_routes["start_y"] == 0)
    
    # apply mask and reset index on df
    int_routes = int_routes[mask].reset_index(drop=True)
    
    return int_routes


Connection to Google Could Distance Matrix API

More information can be found here

def query_google_maps_distance_matrix(df, api_key, mode_transport, mode_metric):
    """
    Query google maps distance matrix api.
    Input:
        df - output of get_intermediate_routes
        api_key - string, personal google cloud api key
        mode_transport - string, either "walking", "driving" or "bicycling"
        mode_metric - string, either "duration" or "distance"
    Returns a list of durations/distances
    """
    
    # start goolge maps api connection 
    gmaps = gm.Client(key=api_key)
    
    # query google distance matrix api for all starting and end points
    result = []
    for i in range(df.shape[0]):
        start = (df["latitude_x"][i], df["longitude_x"][i])
        end   = (df["latitude_y"][i], df["longitude_y"][i])
        # for mode_metric == "duration", seconds are returned
        # for mode_metric == "distance", meters are returned
        result_tmp = gmaps.distance_matrix(start, end, mode=mode_transport)["rows"][0]["elements"][0][mode_metric]["value"]
        if mode_metric == "duration":
            result_tmp = round(result_tmp/60, 2)
        result.append(result_tmp)
        
    return result


Create a dataframe of all possible tours

def get_all_possible_tours(df_locations, do_round):
    """
    This functions returns a dataframe with all possible tours.
    One tour corresponds to a row in the dataframe.
    Input:
        df - output from read_locations_csv
        do_round - boolean, wether end equals starting point
    """

    # create a list of all possible tours
    visiting = df_locations[df_locations.start == 0]["location"].tolist()
    visiting_permutations = list(permutations(visiting, df_locations.shape[0]-1))
    starting = df_locations[df_locations.start == 1]["location"].tolist()[0]

    all_tours = []
    for visiting_tour in visiting_permutations:
        if do_round == True:
            tour = (starting,) + visiting_tour + (starting,)
        else:
            tour = (starting,) + visiting_tour
        all_tours.append(tour)

    # convert list of all tours to dataframe
    df_all_tours = pd.DataFrame(all_tours)
    
    return df_all_tours


Length (duration/distance) of each tour

def calc_tour_length(df_all_tours, df_routes):
    """
    Function calculates the total length of all possible tours
    Input:
        df_all_tours - df with all tours, output of get_all_possible_tours
        df_routes - df with all intermediate routes including length of route
    """
    
    # initialize empty numpy matrix
    dist_matrix = np.empty(df_all_tours.shape)
    
    # fill matrix with duration/distance
    for i in range(dist_matrix.shape[0]):
        for j in range(dist_matrix.shape[1]-1):
            start = df_all_tours[j][i]
            end   = df_all_tours[j+1][i]
            mask = (df_routes["location_x"] == start) & (df_routes["location_y"] == end)
            dist_matrix[i][j] = df_routes[mask]["length"]
            
    total_length = dist_matrix.sum(axis=1)
    
    return total_length


Main function

Calls all other auxiliary functions

def route_optimization(path_csv,
                       sep_csv,
                       do_round,
                       google_cloud_api_key,
                       mode_transport,
                       mode_metric,
                       print_stats):
    """
    Function calculates the best/worst route of a given set of locations.
    Input:
        path_csv - string, path to csv
        sep_csv - string, seperator used in csv
        do_round - boolean, if True -> starting point == end point
        google_cloud_api_key - string, personal google cloud api key
        mode_transport - string, either "walking", "driving" or "bicycling"
        mode_metric - string, either "duration" or "distance"
        print_stats - boolean, if True -> prints best/worst route
    """

    # read the csv file
    df_locations = read_locations_csv(path_csv, 
                                      sep_csv)

    # all intermediate routes
    df_routes = get_intermediate_routes(df_locations, 
                                        do_round)

    # list of google maps distance matrix results
    list_metric_result = query_google_maps_distance_matrix(df_routes, 
                                                           google_cloud_api_key, 
                                                           mode_transport, 
                                                           mode_metric)

    # join length to intermediate routes
    df_routes["length"] = list_metric_result

    # df with all possible tours
    df_all_tours = get_all_possible_tours(df_locations, 
                                          do_round)

    # calculate total length of all tours
    length_all_tours = calc_tour_length(df_all_tours, 
                                        df_routes)
    
    # final dataframe: all possible tours with corresponding length
    df_final = df_all_tours.copy()
    df_final["total_length"] = length_all_tours
    
    # print stats and optimal/worst route
    if print_stats == True:
        min_metric = df_final["total_length"].min()
        max_metric = df_final["total_length"].max()
        if mode_metric == "duration":
            print(f"Fastest time: {round(min_metric,2)} minutes.")
            print(f"Slowest time: {round(max_metric,2)} minutes.")
            print()
            print("Fastest route:")
            print(df_final[df_final["total_length"] == min_metric].iloc[0])
            print()
            print("Slowest route:")
            print(df_final[df_final["total_length"] == max_metric].iloc[0])
        else:
            print(f"Shortest distance: {round(min_metric,2)} meters.")
            print(f"Longest distance: {round(max_metric,2)} meters.")
            print()
            print("Shortest route:")
            print(df_final[df_final["total_length"] == min_metric].iloc[0])
            print()
            print("Longest route:")
            print(df_final[df_final["total_length"] == max_metric].iloc[0])

    return df_final