*----------------------------------------------------------------------- * glom - agglomerate a real sequence into segments of similar values * by Andy Allinger, 2021-2022, released to the public domain * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, without any conditions or restrictions. This software is * provided "as is" without express or implied warranty. * * Segments are merged until a stopping criterion is met: * * To stop when the adjacent segment's average value differs by * some threshold, set ADPARM to the threshold value, set MDPARM to * a very large number, and set K to 1. On output, K will return * the number of segments made. * * To stop when the range of values within a segment reaches some * threshold, set MDPARM to the threshold value, set ADPARM to a * very large number, and set K to 1. On output, K will return the * number of segments made. * * To form a specific number of segments, set ADPARM and MDPARM to * a very large number, and set K to the desired number of * segments. * *___Name_______Type______I/O_______Description__________________________ * X(N) Real In Sequence * N Integer In Length of the sequence * ADPARM Real In Excessive average difference * MDPARM Real In Excessive maximum difference * K Integer In/Out Final number of segments * BOUND(N) Integer Out Beginning each segment * LENGTH(N) Integer Out Length of each segment * AVE(N) Real Out Average value in segment * VAR(N) Real Out Variance in segment * LEAST(N) Real Out Minimum value in segment * GREAT(N) Real Out Maximum value in segment * SUMX(N) Real Out Sum each segment * SUMX2(N) Real Out Sum of squares each segment * DV(N) Real Neither Change in variance from merging * PRIOR(N) Integer Neither Prior segment beginning * NEXT(N) Integer Neither Next segment beginning * RIND(N) Integer Neither Index into heap *----------------------------------------------------------------------- SUBROUTINE GLOM (X, N, ADPARM, MDPARM, K, BOUND, LENGTH, AVE, VAR, $ LEAST, GREAT, SUMX, SUMX2, DV, PRIOR, NEXT, RIND) IMPLICIT NONE INTEGER N INTEGER K, BOUND(N), LENGTH(N), PRIOR(N), NEXT(N), RIND(N) REAL X(N), ADPARM, MDPARM, AVE(N), VAR(N), LEAST(N), GREAT(N), $ SUMX(N), SUMX2(N), DV(N) * Constants REAL ALMBIG, BIG PARAMETER (ALMBIG = 9.E35, $ BIG = 1.E36) * Local variables INTEGER I, II, J, JJ, ! array indices $ IH, ! heap index $ KIN ! input K REAL F, G, ! min, max $ SL, ! sum of lengths $ VNEW ! new variance *----------------------------------------------------------------------- * Begin *----------------------------------------------------------------------- DO J = 1, N BOUND(J) = J LENGTH(J) = 1 PRIOR(J) = J - 1 NEXT(J) = J + 1 RIND(J) = J AVE(J) = X(J) VAR(J) = 0. LEAST(J) = X(J) GREAT(J) = X(J) SUMX(J) = X(J) SUMX2(J) = X(J)**2 END DO *----------------------------------------------------------------------- * The heap-sifting routines seek a maximum value. Thus, to find a * minimal change in variance, negated values are entered into array DV. *----------------------------------------------------------------------- DV(1) = -BIG DO J = 2, N SL = 2. VNEW = (SUMX2(J-1) + SUMX2(J)) / SL - $ ((SUMX(J-1) + SUMX(J)) / SL)**2 DV(J) = -VNEW END DO DO J = N/2, 1, -1 ! form heap CALL SIFTDN (DV, BOUND, RIND, N, N, J) END DO KIN = K K = N *----------------------------------------------------------------------- * Main loop *----------------------------------------------------------------------- DO 10 WHILE (DV(1) > -ALMBIG .AND. K > KIN) J = BOUND(1) I = PRIOR(J) * Check limits F = MIN(LEAST(I), LEAST(J)) G = MAX(GREAT(I), GREAT(J)) IF ( ABS(AVE(I) - AVE(J)) .GE. ADPARM .OR. $ (G - F) .GE. MDPARM ) THEN DV(1) = -BIG CALL SIFTDN (DV, BOUND, RIND, N, K, 1) GO TO 10 END IF * Merge LENGTH(I) = LENGTH(I) + LENGTH(J) SUMX(I) = SUMX(I) + SUMX(J) SUMX2(I) = SUMX2(I) + SUMX2(J) AVE(I) = SUMX(I) / FLOAT(LENGTH(I)) VAR(I) = SUMX2(I) / FLOAT(LENGTH(I)) - AVE(I)**2 LEAST(I) = F GREAT(I) = G * Delete boundary from heap DV(1) = DV(K) BOUND(1) = BOUND(K) RIND(BOUND(1)) = 1 K = K - 1 CALL SIFTDN (DV, BOUND, RIND, N, K, 1) * Recalculate differences II = PRIOR(I) IF (II .GE. 1) THEN IH = RIND(I) SL = FLOAT(LENGTH(II) + LENGTH(I)) VNEW = (SUMX2(II) + SUMX2(I)) / SL - $ ((SUMX(II) + SUMX(I)) / SL)**2 DV(IH) = VAR(II) + VAR(I) - VNEW CALL SIFTUP (DV, BOUND, RIND, N, K, IH) CALL SIFTDN (DV, BOUND, RIND, N, K, IH) END IF JJ = NEXT(J) NEXT(I) = JJ IF (JJ .LE. N) THEN IH = RIND(JJ) SL = FLOAT(LENGTH(J) + LENGTH(JJ)) VNEW = (SUMX2(J) + SUMX2(JJ)) / SL - $ ((SUMX(J) + SUMX(JJ)) / SL)**2 DV(IH) = VAR(J) + VAR(JJ) - VNEW CALL SIFTUP (DV, BOUND, RIND, N, K, IH) CALL SIFTDN (DV, BOUND, RIND, N, K, IH) PRIOR(JJ) = I END IF 10 CONTINUE *----------------------------------------------------------------------- * Put output arrays in order *----------------------------------------------------------------------- CALL ISHELL (BOUND, K) DO J = 1, K LENGTH(J) = LENGTH(BOUND(J)) AVE(J) = AVE(BOUND(J)) VAR(J) = VAR(BOUND(J)) LEAST(J) = LEAST(BOUND(J)) GREAT(J) = GREAT(BOUND(J)) SUMX(J) = SUMX(BOUND(J)) SUMX2(J) = SUMX2(BOUND(J)) END DO RETURN END ! of glom