akester/storehouse

Fork Project

1f7cc1de0c08ed13615dff7b529aa324bf8aebaa

Authored By: Andrew Kester Date: July 7, 2017 1:18 pm

Kinda fix SVN parsing for large diffs.

So SVN performs really bad when getting changes for a series of large files. We don't really have a great way to check and prevent this, since the issue is really at the execution level and not at our parsing level. We kinda cheat and check if the transaction file on disk is large.

This fails with deleted files though, since the file isn't stored in the transaction file.

src/StorehouseBundle/Utils/VCS/Commands/SVN/GetDiffCommand.php
2 2
 namespace StorehouseBundle\Utils\VCS\Commands\SVN;
3 3
 
4 4
 use StorehouseBundle\Utils\VCS\Commands\Helpers\ExtractDiff;
5
+use StorehouseBundle\Entity\VCS\LargeDiff;
6 5
 
7 6
 /*
8 7
  * The Storehouse - Project Storage for Big Ideas
25 26
 {
26 27
 
27 28
     /**
28
+     * Maximum size of SVN transaction files we'll read (in MB)
29
+     *
30
+     * So this is a total hack. SVN performs _really_ bad when trying to diff files that are very large. This doesn't show
31
+     * up in the output (since large files are usually binary and not displayed), but shows in the execution time of the diff
32
+     * command.
33
+     *
34
+     * To fight this, we can't really check the number of files modified (since a single large file could trigger this) or the
35
+     * number of lines (since that check is performed after we read the diff). What we do is check the size of the transaction
36
+     * file on the disk. This is a quick operation and gives us a pretty accurate picture of whether svn can actually do
37
+     * handle the file.
38
+     *
39
+     * This does, however, fail in an edge case.  Files that are deleted are not stored in the transaction file, so the file
40
+     * appears small but SVN will still try and read the entire diff off disk, taking just as long.
41
+     */
42
+    const DIFF_SIZE_LIMIT = 5;
43
+
44
+    /**
45 29
      * The command template to execute.
46 30
      *
47 31
      * @var string
48 32
      */
33
-    protected $command = 'diff -r {{start}}:{{end}} --ignore-properties';
49
+    protected $command = 'diff -r {{start}}:{{end}} --ignore-properties --no-diff-deleted';
50 34
 
51 35
     /**
52 36
      * Keys that are required to run this command.
52 70
     );
53 71
 
54 72
     /**
55
+     * Check the size of the commit file on disk and dtermine if we should try and read it.
56
+     *
57
+     * {@inheritdoc}
58
+     * @see \StorehouseBundle\Utils\VCS\Commands\AbstractCommand::willProcess()
59
+     */
60
+    public function willProcess($extraOutput = null)
61
+    {
62
+        /* First, we have to figure out our shard size and which shard we should look in. */
63
+        $format = file_get_contents(sprintf('%s/db/format', $this->getArgument('project')));
64
+        $matches = array();
65
+        preg_match('/layout sharded ([0-9]+)/', $format, $matches);
66
+
67
+        $shardSize = $matches[1];
68
+        $shard = floor($this->getArgument('commit') / $shardSize);
69
+
70
+        /* Get the size of the commit transaction on disk. */
71
+        $size = filesize(
72
+            sprintf('%s/db/revs/%s/%s', $this->getArgument('project'), $shard, $this->getArgument('commit')));
73
+
74
+        if ($size > (self::DIFF_SIZE_LIMIT * 1024 * 1024)) {
75
+            return array(
76
+                new LargeDiff()
77
+            );
78
+        }
79
+
80
+        return true;
81
+    }
82
+
83
+    /**
84 73
      *
85 74
      * {@inheritdoc}
86 75
      *
87 76