Source code for analysis.MSTE.MSTE

"""
MSTE calculation
"""

import ctypes as ct
import numpy as np
import os
import warnings

_DIRNAME = os.path.dirname(__file__)
libmste = ct.CDLL(os.path.join(_DIRNAME, 'libmste.so'))
connections_c = libmste.connections
cluster_c = libmste.cluster


#TODO: Maybe avoid this int2str and str2int conversion?
def _idx2wall(wall):
  """
  Give a value in binary +/- 1/10/100 to each face
  """
  if wall == 1:
    return 'W'
  if wall == -1:
    return 'E'
  if wall == 2:
    return 'N'
  if wall == -2:
    return 'S'
  if wall == 4:
    return 'U'
  if wall == -4:
    return 'D'
  else:
    raise ValueError('Index {0} not understood'.format(wall))

def _wall2idx(wall):
  """
  Give a value in binary +/- 1/10/100 to each face
  """
  if wall == 'W':
    return 1
  if wall == 'E':
    return -1
  if wall == 'N':
    return 2
  if wall == 'S':
    return -2
  if wall == 'U':
    return 4
  if wall == 'D':
    return -4
  else:
    raise ValueError('Wall {0} not understood'.format(wall))

def _create_graph(conn):
  """
  From a connection list, create a graph dictionary and format
  connections as a dict

  Parameters
  ----------

  conn : 2D numpy array
      A connection array with (i, j, wall) indices in each entry

  Returns
  -------

  graph, cnct: dict, dict
      Graph and connections in a dictionary fashion
  """
  graph = {}
  cnct = {}
  for i in conn:
    iclus = i[0]
    jclus = i[1]
    wall = _idx2wall(i[2])
    try:
      if not jclus in graph[iclus]:
        graph[iclus].append(jclus)
    except KeyError:
      graph[iclus] = [jclus]
    try:
      if not wall in cnct[(iclus, jclus)]:
        cnct[(iclus, jclus)].append(wall)
    except KeyError:
      cnct[(iclus, jclus)] = [wall]
  return graph, cnct

def _plain_bfs(graph, source):
  """A fast BFS node generator"""
  seen = set()
  nextlevel = {source}
  while nextlevel:
    thislevel = nextlevel
    nextlevel = set()
    for vertex in thislevel:
      if vertex not in seen:
        yield vertex
        seen.add(vertex)
        nextlevel.update(graph[vertex])

def _partition(graph):
  """
  Partition a graph in disconnected subgraphs
  """
  seen = set()
  for vertex in graph:
    if vertex not in seen:
      conn = set(_plain_bfs(graph, vertex))
      yield conn
      seen.update(conn)

def _find_cycles(graph):
  """
  Get cycles in undirected graph. But we return also every edge as a
  "cycle", since it can generate an infinite cluster
  """
  gnodes = set(graph.keys())
  cycs = []
  root = None
  while gnodes:  # loop over connected components
    if root is None:
      root = gnodes.pop()
    stack = [root]
    pred = {root:root}
    used = {root:set()}
    while stack:  # walk the spanning tree finding cycles
      z = stack.pop()  # use last-in so cycles easier to find
      zused = used[z]
      for nbr in graph[z]:
        if nbr not in used:   # new node
          pred[nbr] = z
          stack.append(nbr)
          used[nbr] = set([z])
        elif nbr == z:    # self loops
          cycs.append([z])
        elif nbr not in zused:# found a cycle
          pn = used[nbr]
          cycle = [nbr, z]
          p = pred[z]
          while p not in pn:
            cycle.append(p)
            p = pred[p]
          cycle.append(p)
          cycs.append(cycle)
          used[nbr].add(z)
    gnodes -= set(pred)
    root = None
  for gnode in graph.keys():
    for i in graph[gnode]:
      if i > gnode:
        cycs.append([gnode, i])
  return cycs

def _find_paths(graph, cncts):
  """Find paths in an oriented [not directed!] graph and return the
  cycles and the paths through. Also inform whether they are infinite.
  """
  from copy import deepcopy
  def get_paths(cyc):
    """ Recursive function to get all the paths
    """
    paths = []
    def add_node(i=0, path=''):
      """
      Add new node
      """
      if i == len(cyc) - 1:
        paths.append(path)
        return
      for _ in cncts[(cyc[i], cyc[i+1])]:
        add_node(i + 1, deepcopy(path + _))
    add_node()
    return paths

  cycles = []
  inf_clusters = []
  for cyc in _find_cycles(graph):
    cyc.append(cyc[0])
    paths = get_paths(cyc)
    inf = False
    for path in paths:
      topo = 0
      for wall in ['N', 'S', 'E', 'W', 'U', 'D']:
        if wall in path:
          topo += _wall2idx(wall)
      inf = inf or topo != 0
      cycles.append((cyc, path))

    if inf:
      inf_clusters.append(cyc)
  return cycles, inf_clusters

[docs]def mste(x, v, t, box, energy, expansion=0.0): """Calculate MSTE. Parameters ---------- x : numpy float64 array Positions of the particles in the system v : numpy float64 array Velocities of the particles t : numpy int32 array Types of the particles box : numpy float64 array Box energy : boolean Whether to do energy considerations in the cluster computation. expansion : float, optional The expansion velocity of the walls of the box, in box units. Returns ------- value, (mst, inf) : numpy array, numpy array, list value is the [mass, occupancy, fraction] histogram mst is the array of indices to which each particle belongs. inf is the list of infinite clusters. Notes ----- 1. So far, when energy is "True", this only works for systems with `medium` potential. 2. The box needs to be cubic for this to work """ if x.dtype != np.float64: warnings.warn("Type of x should be 64bit float", UserWarning) x = x.astype(np.float64) if v.dtype != np.float64: warnings.warn("Type of v should be 64bit float", UserWarning) v = v.astype(np.float64) if t.dtype != np.int32: warnings.warn("Type of t should be 32bit float", UserWarning) t1 = t.astype(np.int32) for i, j in zip(t.flatten(), t1.flatten()): if i != j: raise ValueError("Casting resulted in different values") index = cluster(x, v, t, box, energy) conn = connections(x, v, t, index, box, energy, expansion) graph, cnct = _create_graph(conn) #TODO: This should be an independent function mst = index.copy() natoms = np.shape(x)[0] inf = [] value = np.zeros((natoms + 1, 3)) value[:, 0] = range(natoms + 1) for nodes in _partition(graph): this_graph = {} for n in nodes: this_graph[n] = graph[n] mst[index == n] = min(nodes) _, inf_clusters = _find_paths(this_graph, cnct) if len(inf_clusters) != 0: inf.append(min(nodes)) for clus in np.unique(mst): mass = np.count_nonzero(mst == clus) protons = np.count_nonzero((mst == clus) & (t.flatten() == 2)) frac = float(protons)/mass if clus in inf: mass = 0 value[mass, 1] += 1 value[mass, 2] *= (value[mass, 1] - 1.0)/value[mass, 1] value[mass, 2] += frac/value[mass, 1] return value, (mst, inf)
[docs]def cluster(x, v, t, box, energy): """ Get either MST or MSTE clusters from the configuration. Parameters ---------- x : numpy float64 array Positions of the particles in the system v : numpy float64 array Velocities of the particles t : numpy int32 array Types of the particles box : numpy float64 array Box energy : boolean Whether to do energy considerations in the cluster computation. Returns ------- index : numpy array An array with the cluster index of each particle. """ size_x = box[0][1] - box[0][0] size_y = box[1][1] - box[1][0] size_z = box[2][1] - box[2][0] if size_x != size_y or size_y != size_z: raise ValueError("The box should be cubic for this to work") else: size = size_x natoms = np.shape(x)[0] index_p = (ct.c_int * natoms)() x_p = x.ctypes.data_as(ct.c_void_p) v_p = v.ctypes.data_as(ct.c_void_p) t_p = t.ctypes.data_as(ct.c_void_p) cluster_c.argtypes = [ct.c_void_p, ct.c_void_p, ct.c_void_p, ct.c_int, ct.c_double, ct.c_bool, ct.c_void_p] cluster_c(x_p, v_p, t_p, natoms, size, energy, index_p) index = np.frombuffer(index_p, dtype=ct.c_int, count=natoms) return index
[docs]def connections(x, v, t, index, box, energy, expansion): """Find the connections with a given cluster distribution. Parameters ---------- x : numpy float64 array Positions of the particles in the system v : numpy float64 array Velocities of the particles t : numpy int32 array Types of the particles index : numpy array Cluster index of the particles box : numpy float64 array Box energy : boolean Whether to do energy considerations in the cluster computation. expansion : float, optional The expansion velocity of the walls of the box, in box units. Returns ------- conn : 2D numpy array The connections between the clusters, in the format: [[cluster1, cluster2, wall], [cluster1, cluster2, wall]...] Notes ----- 1. So far, when energy is "True", this only works for systems with `medium` potential. 2. The box needs to be cubic for this to work """ size_x = box[0][1] - box[0][0] size_y = box[1][1] - box[1][0] size_z = box[2][1] - box[2][0] if size_x != size_y or size_y != size_z: raise ValueError("The box should be cubic for this to work") else: size = size_x natoms = np.shape(x)[0] index_p = index.ctypes.data_as(ct.c_void_p) x_p = x.ctypes.data_as(ct.c_void_p) v_p = v.ctypes.data_as(ct.c_void_p) t_p = t.ctypes.data_as(ct.c_void_p) nclus = len(np.unique(index)) #TODO: check for size not too large if nclus > 1000: nclus = 1000 guess = nclus ** 2 * 8 tmp = (ct.c_int * (3 * guess))() connections_c.argtypes = [ct.c_void_p, ct.c_void_p, ct.c_void_p, ct.c_void_p, ct.c_int, ct.c_double, ct.c_double, ct.c_bool, ct.c_void_p] connections_c.restype = ct.c_int count = connections_c(index_p, x_p, v_p, t_p, natoms, size, expansion, energy, tmp) conn = np.frombuffer(tmp, dtype=ct.c_int, count=count * 3) conn = conn.reshape((count, 3)) return conn