Setup

Loading packages, initializing data and variables.

# Importing packages
import numpy as np
import pandas as pd
import re
import MySQLdb

# Display settings
pd.options.display.max_rows = 6

# Server connection to MySQL:
conn = MySQLdb.connect(host= "localhost",
                  user="yourusername",
                  passwd="yourpassword",
                  db="bdodae_new")

x = conn.cursor()

# Importing SQL data
node = pd.read_sql("SELECT * FROM node;", con = conn)

# Create connection/adjacency matrix weighted with CP costs
cp_connection_matrix = pd.DataFrame(np.zeros((len(node["name"]), len(node["name"]))), index = node["name"], columns = node["name"])

for x, name in zip(node["connections"], node["name"]):
    connections = re.split(", ", x)
    for connected_nodes in connections:
        # Edges going into cities have weight 0 and thus disappear from our graph, but we don't want this to happen
        # So we will instead set their weights to a very small value
        if (node[node["name"] == connected_nodes]["cp"].values[0] == 0):
            cp_connection_matrix.at[name, connected_nodes] = 0.000000001
        else:
            cp_connection_matrix.at[name, connected_nodes] = node[node["name"] == connected_nodes]["cp"].values[0]

# Import networkx
import networkx as nx

# Create graph
G = nx.from_numpy_matrix(np.matrix(cp_connection_matrix), create_using = nx.DiGraph())
# Add node attributes
node_attributes = node[["name", "cp", "area", "type"]].to_dict('index')
nx.set_node_attributes(G, node_attributes)

### Helper functions
# Takes in list as input (use get_nx_node_id for a single node)
def get_node_id(name):
    return([node[node['name'] == x].index[0] for x in name])

def get_node_name(idx):
    return(node.iloc[idx]['name'])

def get_total_cp(idx):
    return(sum(node.iloc[idx]['cp'].values))

def get_nx_node_id(name):
    return([x for x,y in G.nodes(data=True) if y["name"] == name][0])
# Get the total value of each subnode
subnode_values = pd.read_sql("""
SELECT item_yield.node_name, item_price.subnode_id, item_yield.subnode_name, item_yield.cp,
    SUM(item_price.user_average * item_yield.amount) AS subnode_value_user,
    SUM(item_price.recent_value * item_yield.amount) AS subnode_value_recent,
    SUM(item_price.vendor_sell * item_yield.amount) AS subnode_value_vendor
FROM
    (SELECT subnode_item.subnode_id, item.name, prices.*
    FROM subnode_item JOIN item ON subnode_item.item_id = item.item_id
      JOIN prices ON item.item_id = prices.item_id
    ORDER BY subnode_item.subnode_id, item.item_id) AS item_price
    JOIN
    (SELECT subnode.subnode_id, item.item_id, item.name, yield.amount, subnode.name AS subnode_name, subnode.cp, node.name AS node_name
    FROM subnode JOIN yield ON subnode.subnode_id = yield.subnode_id
      JOIN item ON yield.item_id = item.item_id
      JOIN node ON subnode.node_id = node.node_id
    ORDER BY subnode.subnode_id, item.item_id) AS item_yield
    ON item_price.subnode_id = item_yield.subnode_id
        AND item_price.item_id = item_yield.item_id
        AND item_price.name = item_yield.name
GROUP BY item_price.subnode_id
ORDER BY item_price.subnode_id
""", con = conn)

# Add per CP values
subnode_values["subnode_value_user_per_cp"] = subnode_values["subnode_value_user"] / subnode_values["cp"]
subnode_values["subnode_value_recent_per_cp"] = subnode_values["subnode_value_recent"] / subnode_values["cp"]

# Add subnode values to node graph G
for i in range(subnode_values.shape[0]):
    nx.set_node_attributes(G, {i+1000: {"subnode_value_user": subnode_values.loc[i]["subnode_value_user"],
                                        "subnode_value_recent": subnode_values.loc[i]["subnode_value_recent"],
                                        "subnode_value_vendor": subnode_values.loc[i]["subnode_value_vendor"]}})
    
# Adding subnode ranks based on their value per CP
subnode_values["value_rank"] = subnode_values["subnode_value_user_per_cp"].rank(ascending = False)
subnode_values["recent_rank"] = subnode_values["subnode_value_recent_per_cp"].rank(ascending = False)

K shortest simple paths

K shortest simple paths is a very greedy algorithm that calculates value based on the assumption that players will take all nodes and subnodes in the returned path. This is not realistic because there is a CP softcap in game.

Pseudocode:
  1. Calculate some number k shortest simple paths using networkx’s function shortest_simple_paths.
  2. Find the best value per CP path by:
  3. Calculating combined value of subnodes for nodes in our path
  4. Finding the total CP cost from the nodes and subnodes
  5. Dividing. Ta-da…
  6. Extend this to work for inputs of more than one path by basically repeating steps (1) and (2) for each (start, end) pair.
import itertools
# Find the k shortest paths using CP costs as edge weights
def k_shortest_paths(G, source, target, k, weight = 'weight'):
    return list(itertools.islice(nx.shortest_simple_paths(G, source, target, weight = weight), k))

# k = 20 is arbitrarily chosen, we will tune this later
paths = k_shortest_paths(G, get_node_id(["Calpheon"])[0], get_node_id(["Altinova"])[0], 20)
for path in paths:
    print(path)
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 230, 109, 78, 318, 332, 176, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 78, 318, 332, 176, 38, 160, 23, 22]
[69, 101, 97, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 255, 40, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 29, 74, 316, 328, 38, 160, 23, 22]
[69, 101, 122, 220, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 255, 327, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 12, 208, 332, 176, 38, 160, 23, 22]
[69, 122, 219, 220, 260, 65, 244, 206, 155, 108, 171, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 298, 28, 29, 74, 316, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 230, 109, 78, 171, 12, 328, 38, 160, 23, 22]
[69, 101, 97, 61, 244, 206, 155, 108, 171, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 29, 74, 329, 189, 227, 255, 40, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 29, 74, 316, 227, 255, 40, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 29, 74, 329, 189, 227, 255, 327, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 108, 171, 29, 74, 316, 227, 255, 327, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 230, 241, 109, 78, 318, 332, 176, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 230, 109, 78, 318, 332, 208, 12, 328, 38, 160, 23, 22]
[69, 122, 220, 260, 65, 244, 206, 155, 230, 109, 78, 318, 332, 176, 364, 25, 1, 160, 23, 22]
# Works best if you have a lot of CP, because this assumes you will take all subnodes
# Very greedy algorithm, seeks to maximize value per CP along a single path, ignoring potentially better neighboring nodes
def get_best_path(paths, output = False):
    """
    Gets the best value per CP path from paths generated by k_shortest_paths
    
    Args:
        paths: Output generated from k_shortest_paths
            Ex. paths = k_shortest_paths(G, get_node_id(["Calpheon"])[0], get_node_id(["Altinova"])[0], 20)
        output: Option to print detailed output. Set output = True to see this.
        
    Returns:
        The best value per CP path out of the k paths generated by k_shortest_paths
    """
    path_df_list = []

    # For each path generated by k_shortest_paths, get a dataframe containing all subnodes connected to nodes in the path
    # and then sum all values to get path value and additional CP cost
    for path in paths:
        path_subnode_values_df = pd.DataFrame()   
        for node_id in path:
            path_subnode_values_df = path_subnode_values_df.append(subnode_values.loc[subnode_values["node_name"] == G.nodes[node_id]["name"]])
        path_df_list.append(path_subnode_values_df)
        
    # Very messy, just summing up values for each path (naive approach)
    path_node_cp = []
    path_subnode_cp = []
    path_subnode_value_user = []
    path_subnode_value_recent = []
    path_subnode_value_vendor = []
    for path, path_df in zip(paths, path_df_list):
        path_node_cp.append(get_total_cp(path))
        path_subnode_cp.append(path_df["cp"].sum())
        path_subnode_value_user.append(path_df["subnode_value_user"].sum())
        path_subnode_value_recent.append(path_df["subnode_value_recent"].sum())
        path_subnode_value_vendor.append(path_df["subnode_value_vendor"].sum())
        
    # Element-wise addition of 2 lists
    from operator import add

    path_values = pd.DataFrame({
        "path": paths,
        "total_cp": list(map(add, path_node_cp, path_subnode_cp)),
        "value_user": path_subnode_value_user,
        "value_recent": path_subnode_value_recent,
        "value_vendor": path_subnode_value_vendor
    })
    
    # Add value per CP for each path
    path_values["value_user_per_cp"] = path_values["value_user"] / path_values["total_cp"]
    path_values["value_recent_per_cp"] = path_values["value_recent"] / path_values["total_cp"]
    
    # Get the best path and its subnodes
    best_path = path_values.loc[path_values["value_user_per_cp"].idxmax()]["path"]
    best_subnodes_df = path_df_list[path_values["value_user_per_cp"].idxmax()]
    
    # Print output if turned on
    if output == True:
        print("Start: " + get_node_name(best_path[0]) + " | End: " + get_node_name(best_path[len(best_path)-1]))
        print("\tNode CP cost: " + str(get_total_cp(best_path)))
        print("\tSubnode CP cost: " + str(best_subnodes_df["cp"].sum()))
        print("\tTotal CP cost: " + str(get_total_cp(best_path) + best_subnodes_df["cp"].sum()))
        print("\tPath value: " + str(best_subnodes_df["subnode_value_user"].sum()))
        print("\tPath value per CP: " + str(best_subnodes_df["subnode_value_user"].sum() / (get_total_cp(best_path) + best_subnodes_df["cp"].sum())))
    
    # Returns best path and corresponding subnode dataframe
    return(best_path, best_subnodes_df)
# Demonstration of get_best_path
get_best_path(k_shortest_paths(G, get_node_id(["Calpheon"])[0], get_node_id(["Altinova"])[0], 20), output = True)
Start: Calpheon | End: Altinova
	Node CP cost: 21
	Subnode CP cost: 24
	Total CP cost: 45
	Path value: 165422.21986860037
	Path value per CP: 3676.0493304133415





([69,
  122,
  220,
  260,
  65,
  244,
  206,
  155,
  108,
  171,
  29,
  74,
  329,
  189,
  227,
  255,
  40,
  23,
  22],
                       node_name  subnode_id  \
 162                    Oze Pass         163   
 149  Northern Plain Of Serendia         150   
 150  Northern Plain Of Serendia         151   
 ..                          ...         ...   
 140                Mediah Shore         141   
 158              Omar Lava Cave         159   
 159              Omar Lava Cave         160   
 
                                  subnode_name  cp  subnode_value_user  \
 162                    Oze Pass - Lumbering.1   1        10570.759717   
 149  Northern Plain Of Serendia - Gathering.1   1         2826.930099   
 150  Northern Plain Of Serendia - Lumbering.2   1         6183.050148   
 ..                                        ...  ..                 ...   
 140                   Mediah Shore - Mining.1   3        10792.679879   
 158                 Omar Lava Cave - Mining.1   3        14428.530141   
 159                 Omar Lava Cave - Mining.2   3        15385.450207   
 
      subnode_value_recent  subnode_value_vendor  subnode_value_user_per_cp  \
 162          13332.749623           1626.549969               10570.759717   
 149           3481.250122            261.510009                2826.930099   
 150           6242.200157           1148.250031                6183.050148   
 ..                    ...                   ...                        ...   
 140          11103.299851           1932.789973                3597.559960   
 158          38878.100433           2127.240011                4809.510047   
 159          44988.250780           2707.420044                5128.483402   
 
      subnode_value_recent_per_cp  value_rank  recent_rank  
 162                 13332.749623        67.0         68.0  
 149                  3481.250122       178.0        175.0  
 150                  6242.200157        91.0        128.0  
 ..                           ...         ...          ...  
 140                  3701.099950       160.0        170.0  
 158                 12959.366811       126.0         73.0  
 159                 14996.083593       117.0         55.0  
 
 [12 rows x 11 columns])
def optimize_paths(locations, output = False):
    """
    Extension of the get_best_path function. Can take multiple start and end location pairings and will combine results into
    one final path removing duplicates.
    
    Args:
        locations: Pairings of start and end locations. They should both be main nodes.
            Ex. [("Calpheon", "Altinova"), ("Calpheon", "Heidel"), ("Heidel", "Velia")]
        output: Option to print detailed output. Set output = True to see this.
        
    Returns:
        One master path that connects all location pairings given as input.   
    """
    master_path_df = pd.DataFrame()
    master_path = []
    
    for path_location in locations:
        path, path_df = get_best_path(k_shortest_paths(G, get_node_id([path_location[0]])[0], get_node_id([path_location[1]])[0], 20), output = output)
        master_path.append(path)
        master_path_df = pd.concat([master_path_df, path_df])
        if output == True:
            # Print a line for cleaner output
            print("_______________________________________________________________________________________________________________")
    
    # Output
    if output == True:
        print("Combining paths...")
        master_path = list(dict.fromkeys(itertools.chain(*master_path)))
        print("Done")
        print("Cleaning subnode dataframes...")
        master_path_df = master_path_df.drop_duplicates()
        print("Done")
        print("_______________________________")
        print("Node CP: " + str(get_total_cp(master_path)))
        print("Subnode CP: " + str(master_path_df["cp"].sum()))
        print("Total CP: " + str(get_total_cp(master_path) + master_path_df["cp"].sum()))
        print("Total value: " + str(master_path_df["subnode_value_user"].sum()))
        print("Value per CP: " + str(master_path_df["subnode_value_user"].sum() / (get_total_cp(master_path) + master_path_df["cp"].sum())))
    
# Demonstration of optimize_paths
optimize_paths([("Calpheon", "Altinova"), ("Calpheon", "Heidel"), ("Heidel", "Velia")], output = True)
Start: Calpheon | End: Altinova
	Node CP cost: 21
	Subnode CP cost: 24
	Total CP cost: 45
	Path value: 165422.21986860037
	Path value per CP: 3676.0493304133415
_______________________________________________________________________________________________________________
Start: Calpheon | End: Heidel
	Node CP cost: 7
	Subnode CP cost: 5
	Total CP cost: 12
	Path value: 42284.31983745098
	Path value per CP: 3523.693319787582
_______________________________________________________________________________________________________________
Start: Heidel | End: Velia
	Node CP cost: 13
	Subnode CP cost: 9
	Total CP cost: 22
	Path value: 71020.9696764946
	Path value per CP: 3228.225894386118
_______________________________________________________________________________________________________________
Combining paths...
Done
Cleaning subnode dataframes...
Done
_______________________________
Node CP: 33
Subnode CP: 31
Total CP: 64
Total value: 213739.60967189074
Value per CP: 3339.6814011232927