# This file contains useful filter functions for conducting topological data analysis with the mapper algorithm # Last update: August 2018, Drew C. Pendergrass (drew@drewpendergrass.com) ############################################################################### #We assume that we are given a collection of N points as a point cloud data X together with a distance function d(x, y). #All the functions here take a distance matrix. It is on you to make sure the matrix is properly formatted, as safety checks are lazily included (e.g. triangle rule is not checked). #At no point is Euclidean distance assumed. check_distance_matrix=function(dist_mat){ d=as.matrix(dist_mat) if(!isSymmetric(d)){stop("Distance matrix is not symmetric.")} if(!all(d>=0)){stop("Distance matrix contains negative values.")} if(sum(diag(d))!=0){stop("Distance matrix has nonzero values on diagonal.")} return(d) } ############################################################################### # See https://en.wikipedia.org/wiki/Kernel_density_estimation. # "Smooths" data with a kernel ("gaussian","epanechnikov","rectangular"), giving an estimate for density. Returns the filter function reflecting this. # epsilon > 0 controls smoothing # For more information, see Topological Methods for the Analysis of High Dimensional Data Sets and 3D Object Recognition by Singh et. al. (2007) kernel_density = function(distance_matrix,epsilon=1, kernel=c("gaussian","epanechnikov","rectangular")) { distance_matrix=check_distance_matrix(distance_matrix) N=nrow(distance_matrix) #we are assuming it is square final_filter = array(0, N) if(kernel=="gaussian"){ for(x in 1:N){ for(y in 1:N){ if(x==y){next} final_filter[x]=final_filter[x]+exp(-(distance_matrix[x,y]^2)/epsilon) } } } else {stop("Kernel not supported at this time.")} normalize=sum(final_filter) final_filter=final_filter/normalize return(final_filter) } ############################################################################### # Returns the filter function giving the eccentricty of each point represented by the distance matrix. #Intuitively, this is distance from the "center" without defining a center. # p is analagous to L_p norm, and can be in range [1,infty). For the p=infinity case, set p=0. #For more information, see Topological Methods for the Analysis of High Dimensional Data Sets and 3D Object Recognition by Singh et. al. (2007) eccentricity = function(distance_matrix, p=2,na.rm=FALSE) { distance_matrix=check_distance_matrix(distance_matrix) N=nrow(distance_matrix) #we are assuming it is square final_filter = array(NA, N) if(p>=1){ for(x in 1:N){ scaled_dist=distance_matrix[x,]^p filter = (sum(scaled_dist,na.rm=na.rm)/N)^(1/p) final_filter[x]=filter } return(final_filter) } else if(p==0){ for(x in 1:N){ filter=max(distance_matrix[x,]) final_filter[x]=filter } return(final_filter) } else { stop("p must be greater than or equal to 1. If you want the infinity case, set p to 0. Other values are not handled.") } } ###############################################################################