Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2891022
ArcanistDiffParser.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Advanced/Developer...
View Handle
View Hovercard
Size
43 KB
Referenced Files
None
Subscribers
None
ArcanistDiffParser.php
View Options
<?php
/**
* Parses diffs from a working copy.
*/
final
class
ArcanistDiffParser
extends
Phobject
{
protected
$repositoryAPI
;
protected
$text
;
protected
$line
;
protected
$lineSaved
;
protected
$isGit
;
protected
$isMercurial
;
protected
$isRCS
;
protected
$detectBinaryFiles
=
false
;
protected
$tryEncoding
;
protected
$rawDiff
;
protected
$writeDiffOnFailure
;
protected
$changes
=
array
(
)
;
private
$forcePath
;
public
function
setRepositoryAPI
(
ArcanistRepositoryAPI
$repository_api
)
{
$this
->
repositoryAPI
=
$repository_api
;
return
$this
;
}
public
function
setDetectBinaryFiles
(
$detect
)
{
$this
->
detectBinaryFiles
=
$detect
;
return
$this
;
}
public
function
setTryEncoding
(
$encoding
)
{
$this
->
tryEncoding
=
$encoding
;
return
$this
;
}
public
function
forcePath
(
$path
)
{
$this
->
forcePath
=
$path
;
return
$this
;
}
public
function
setChanges
(
array
$changes
)
{
assert_instances_of
(
$changes
,
'ArcanistDiffChange'
)
;
$this
->
changes
=
mpull
(
$changes
,
null
,
'getCurrentPath'
)
;
return
$this
;
}
public
function
parseSubversionDiff
(
ArcanistSubversionAPI
$api
,
$paths
)
{
$this
->
setRepositoryAPI
(
$api
)
;
$diffs
=
array
(
)
;
foreach
(
$paths
as
$path
=>
$status
)
{
if
(
$status
&
ArcanistRepositoryAPI
::
FLAG_UNTRACKED
||
$status
&
ArcanistRepositoryAPI
::
FLAG_CONFLICT
||
$status
&
ArcanistRepositoryAPI
::
FLAG_MISSING
)
{
unset
(
$paths
[
$path
]
)
;
}
}
$root
=
null
;
$from
=
array
(
)
;
foreach
(
$paths
as
$path
=>
$status
)
{
$change
=
$this
->
buildChange
(
$path
)
;
if
(
$status
&
ArcanistRepositoryAPI
::
FLAG_ADDED
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_ADD
)
;
}
else
if
(
$status
&
ArcanistRepositoryAPI
::
FLAG_DELETED
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_DELETE
)
;
}
else
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_CHANGE
)
;
}
$is_dir
=
is_dir
(
$api
->
getPath
(
$path
)
)
;
if
(
$is_dir
)
{
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_DIRECTORY
)
;
// We have to go hit the diff even for directories because they may
// have property changes or moves, etc.
}
$is_link
=
is_link
(
$api
->
getPath
(
$path
)
)
;
if
(
$is_link
)
{
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_SYMLINK
)
;
}
$diff
=
$api
->
getRawDiffText
(
$path
)
;
if
(
$diff
)
{
$this
->
parseDiff
(
$diff
)
;
}
$info
=
$api
->
getSVNInfo
(
$path
)
;
if
(
idx
(
$info
,
'Copied From URL'
)
)
{
if
(
!
$root
)
{
$rinfo
=
$api
->
getSVNInfo
(
'.'
)
;
$root
=
$rinfo
[
'URL'
]
.
'/'
;
}
$cpath
=
$info
[
'Copied From URL'
]
;
$root_len
=
strlen
(
$root
)
;
if
(
!
strncmp
(
$cpath
,
$root
,
$root_len
)
)
{
$cpath
=
substr
(
$cpath
,
$root_len
)
;
// The user can "svn cp /path/to/file@12345 x", which pulls a file out
// of version history at a specific revision. If we just use the path,
// we'll collide with possible changes to that path in the working
// copy below. In particular, "svn cp"-ing a path which no longer
// exists somewhere in the working copy and then adding that path
// gets us to the "origin change type" branches below with a
// TYPE_ADD state on the path. To avoid this, append the origin
// revision to the path so we'll necessarily generate a new change.
// TODO: In theory, you could have an '@' in your path and this could
// cause a collision, e.g. two files named 'f' and 'f@12345'. This is
// at least somewhat the user's fault, though.
if
(
$info
[
'Copied From Rev'
]
)
{
if
(
$info
[
'Copied From Rev'
]
!=
$info
[
'Revision'
]
)
{
$cpath
.=
'@'
.
$info
[
'Copied From Rev'
]
;
}
}
$change
->
setOldPath
(
$cpath
)
;
$from
[
$path
]
=
$cpath
;
}
}
$type
=
$change
->
getType
(
)
;
if
(
(
$type
===
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
||
$type
===
ArcanistDiffChangeType
::
TYPE_DELETE
)
&&
idx
(
$info
,
'Node Kind'
)
===
'directory'
)
{
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_DIRECTORY
)
;
}
}
foreach
(
$paths
as
$path
=>
$status
)
{
$change
=
$this
->
buildChange
(
$path
)
;
if
(
empty
(
$from
[
$path
]
)
)
{
continue
;
}
if
(
empty
(
$this
->
changes
[
$from
[
$path
]
]
)
)
{
if
(
$change
->
getType
(
)
==
ArcanistDiffChangeType
::
TYPE_COPY_HERE
)
{
// If the origin path wasn't changed (or isn't included in this diff)
// and we only copied it, don't generate a changeset for it. This
// keeps us out of trouble when we go to 'arc commit' and need to
// figure out which files should be included in the commit list.
continue
;
}
}
$origin
=
$this
->
buildChange
(
$from
[
$path
]
)
;
$origin
->
addAwayPath
(
$change
->
getCurrentPath
(
)
)
;
$type
=
$origin
->
getType
(
)
;
switch
(
$type
)
{
case
ArcanistDiffChangeType
::
TYPE_MULTICOPY
:
case
ArcanistDiffChangeType
::
TYPE_COPY_AWAY
:
// "Add" is possible if you do some bizarre tricks with svn:ignore and
// "svn copy"'ing URLs straight from the repository; you can end up with
// a file that is a copy of itself. See T271.
case
ArcanistDiffChangeType
::
TYPE_ADD
:
break
;
case
ArcanistDiffChangeType
::
TYPE_DELETE
:
$origin
->
setType
(
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
)
;
break
;
case
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
:
$origin
->
setType
(
ArcanistDiffChangeType
::
TYPE_MULTICOPY
)
;
break
;
case
ArcanistDiffChangeType
::
TYPE_CHANGE
:
$origin
->
setType
(
ArcanistDiffChangeType
::
TYPE_COPY_AWAY
)
;
break
;
default
:
throw
new
Exception
(
pht
(
'Bad origin state %s.'
,
$type
)
)
;
}
$type
=
$origin
->
getType
(
)
;
switch
(
$type
)
{
case
ArcanistDiffChangeType
::
TYPE_MULTICOPY
:
case
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
:
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_MOVE_HERE
)
;
break
;
case
ArcanistDiffChangeType
::
TYPE_ADD
:
case
ArcanistDiffChangeType
::
TYPE_COPY_AWAY
:
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_COPY_HERE
)
;
break
;
default
:
throw
new
Exception
(
pht
(
'Bad origin state %s.'
,
$type
)
)
;
}
}
return
$this
->
changes
;
}
public
function
parseDiff
(
$diff
)
{
if
(
!
strlen
(
trim
(
$diff
)
)
)
{
throw
new
Exception
(
pht
(
"Can't parse an empty diff!"
)
)
;
}
// Detect `git-format-patch`, by looking for a "---" line somewhere in
// the file and then a footer with Git version number, which looks like
// this:
//
// --
// 1.8.4.2
//
// Note that `git-format-patch` adds a space after the "--", but we don't
// require it when detecting patches, as trailing whitespace can easily be
// lost in transit.
$detect_patch
=
'/^---$.*^-- ?[\s\d.]+\z/ms'
;
$message
=
null
;
if
(
preg_match
(
$detect_patch
,
$diff
)
)
{
list
(
$message
,
$diff
)
=
$this
->
stripGitFormatPatch
(
$diff
)
;
}
$this
->
didStartParse
(
$diff
)
;
// Strip off header comments. While `patch` allows comments anywhere in the
// file, `git apply` is more strict. We get these comments in `hg export`
// diffs, and Eclipse can also produce them.
$line
=
$this
->
getLineTrimmed
(
)
;
while
(
preg_match
(
'/^#/'
,
$line
)
)
{
$line
=
$this
->
nextLine
(
)
;
}
if
(
strlen
(
$message
)
)
{
// If we found a message during pre-parse steps, add it to the resulting
// changes here.
$change
=
$this
->
buildChange
(
null
)
->
setType
(
ArcanistDiffChangeType
::
TYPE_MESSAGE
)
->
setMetadata
(
'message'
,
$message
)
;
}
do
{
$patterns
=
array
(
// This is a normal SVN text change, probably from "svn diff".
'(?P<type>Index): (?P<cur>.+)'
,
// This is an SVN text change, probably from "svnlook diff".
'(?P<type>Modified|Added|Deleted|Copied): (?P<cur>.+)'
,
// This is an SVN property change, probably from "svn diff".
'(?P<type>Property changes on): (?P<cur>.+)'
,
// This is a git commit message, probably from "git show".
'(?P<type>commit) (?P<hash>[a-f0-9]+)(?: \(.*\))?'
,
// This is a git diff, probably from "git show" or "git diff".
// Note that the filenames may appear quoted.
'(?P<type>diff --git) (?P<oldnew>.*)'
,
// RCS Diff
'(?P<type>rcsdiff -u) (?P<oldnew>.*)'
,
// This is a unified diff, probably from "diff -u" or synthetic diffing.
'(?P<type>---) (?P<old>.+)\s+\d{4}-\d{2}-\d{2}.*'
,
'(?P<binary>Binary files|Files) '
.
'(?P<old>.+)\s+\d{4}-\d{2}-\d{2} and '
.
'(?P<new>.+)\s+\d{4}-\d{2}-\d{2} differ.*'
,
// This is a normal Mercurial text change, probably from "hg diff". It
// may have two "-r" blocks if it came from "hg diff -r x:y".
'(?P<type>diff -r) (?P<hgrev>[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P<cur>.+)'
,
)
;
$line
=
$this
->
getLineTrimmed
(
)
;
$match
=
null
;
$ok
=
$this
->
tryMatchHeader
(
$patterns
,
$line
,
$match
)
;
$failed_parse
=
false
;
if
(
!
$ok
&&
$this
->
isFirstNonEmptyLine
(
)
)
{
// 'hg export' command creates so called "extended diff" that
// contains some meta information and comment at the beginning
// (isFirstNonEmptyLine() to check for beginning). Actual mercurial
// code detects where comment ends and unified diff starts by
// searching for "diff -r" or "diff --git" in the text.
$this
->
saveLine
(
)
;
$line
=
$this
->
nextLineThatLooksLikeDiffStart
(
)
;
if
(
!
$this
->
tryMatchHeader
(
$patterns
,
$line
,
$match
)
)
{
// Restore line before guessing to display correct error.
$this
->
restoreLine
(
)
;
$failed_parse
=
true
;
}
}
else
if
(
!
$ok
)
{
$failed_parse
=
true
;
}
if
(
$failed_parse
)
{
$this
->
didFailParse
(
pht
(
"Expected a hunk header, like '%s' (svn), '%s' (svn properties), "
.
"'%s' (git show), '%s' (git diff), '%s' (unified diff), or "
.
"'%s' (hg diff or patch)."
,
'Index: /path/to/file.ext'
,
'Property changes on: /path/to/file.ext'
,
'commit 59bcc3ad6775562f845953cf01624225'
,
'diff --git'
,
'--- filename'
,
'diff -r'
)
)
;
}
if
(
isset
(
$match
[
'type'
]
)
)
{
if
(
$match
[
'type'
]
==
'diff --git'
)
{
$filename
=
self
::
extractGitCommonFilename
(
$match
[
'oldnew'
]
)
;
if
(
$filename
!==
null
)
{
$match
[
'old'
]
=
$filename
;
$match
[
'cur'
]
=
$filename
;
}
}
}
$change
=
$this
->
buildChange
(
idx
(
$match
,
'cur'
)
)
;
if
(
isset
(
$match
[
'old'
]
)
)
{
$change
->
setOldPath
(
$match
[
'old'
]
)
;
}
if
(
isset
(
$match
[
'hash'
]
)
)
{
$change
->
setCommitHash
(
$match
[
'hash'
]
)
;
}
if
(
isset
(
$match
[
'binary'
]
)
)
{
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_BINARY
)
;
$line
=
$this
->
nextNonemptyLine
(
)
;
continue
;
}
$line
=
$this
->
nextLine
(
)
;
switch
(
$match
[
'type'
]
)
{
case
'Index'
:
case
'Modified'
:
case
'Added'
:
case
'Deleted'
:
case
'Copied'
:
$this
->
parseIndexHunk
(
$change
)
;
break
;
case
'Property changes on'
:
$this
->
parsePropertyHunk
(
$change
)
;
break
;
case
'diff --git'
:
$this
->
setIsGit
(
true
)
;
$this
->
parseIndexHunk
(
$change
)
;
break
;
case
'commit'
:
$this
->
setIsGit
(
true
)
;
$this
->
parseCommitMessage
(
$change
)
;
break
;
case
'---'
:
$ok
=
preg_match
(
'@^(?:\+\+\+) (.*)\s+\d{4}-\d{2}-\d{2}.*$@'
,
$line
,
$match
)
;
if
(
!
$ok
)
{
$this
->
didFailParse
(
pht
(
"Expected '%s' in unified diff."
,
'+++ filename'
)
)
;
}
$change
->
setCurrentPath
(
$match
[
1
]
)
;
$line
=
$this
->
nextLine
(
)
;
$this
->
parseChangeset
(
$change
)
;
break
;
case
'diff -r'
:
$this
->
setIsMercurial
(
true
)
;
$this
->
parseIndexHunk
(
$change
)
;
break
;
case
'rcsdiff -u'
:
$this
->
isRCS
=
true
;
$this
->
parseIndexHunk
(
$change
)
;
break
;
default
:
$this
->
didFailParse
(
pht
(
'Unknown diff type.'
)
)
;
break
;
}
}
while
(
$this
->
getLine
(
)
!==
null
)
;
$this
->
didFinishParse
(
)
;
$this
->
loadSyntheticData
(
)
;
return
$this
->
changes
;
}
protected
function
tryMatchHeader
(
$patterns
,
$line
,
&
$match
)
{
foreach
(
$patterns
as
$pattern
)
{
if
(
preg_match
(
'@^'
.
$pattern
.
'$@'
,
$line
,
$match
)
)
{
return
true
;
}
}
return
false
;
}
protected
function
parseCommitMessage
(
ArcanistDiffChange
$change
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_MESSAGE
)
;
$message
=
array
(
)
;
$line
=
$this
->
getLine
(
)
;
if
(
preg_match
(
'/^Merge: /'
,
$line
)
)
{
$this
->
nextLine
(
)
;
}
$line
=
$this
->
getLine
(
)
;
if
(
!
preg_match
(
'/^Author: /'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
"Expected 'Author:'."
)
)
;
}
$line
=
$this
->
nextLine
(
)
;
if
(
!
preg_match
(
'/^Date: /'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
"Expected 'Date:'."
)
)
;
}
while
(
(
$line
=
$this
->
nextLineTrimmed
(
)
)
!==
null
)
{
if
(
strlen
(
$line
)
&&
$line
[
0
]
!=
' '
)
{
break
;
}
// Strip leading spaces from Git commit messages. Note that empty lines
// are represented as just "\n"; don't touch those.
$message
[
]
=
preg_replace
(
'/^ /'
,
''
,
$this
->
getLine
(
)
)
;
}
$message
=
rtrim
(
implode
(
''
,
$message
)
,
"\r\n"
)
;
$change
->
setMetadata
(
'message'
,
$message
)
;
}
/**
* Parse an SVN property change hunk. These hunks are ambiguous so just sort
* of try to get it mostly right. It's entirely possible to foil this parser
* (or any other parser) with a carefully constructed property change.
*/
protected
function
parsePropertyHunk
(
ArcanistDiffChange
$change
)
{
$line
=
$this
->
getLineTrimmed
(
)
;
if
(
!
preg_match
(
'/^_+$/'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
"Expected '%s'."
,
'______________________'
)
)
;
}
$line
=
$this
->
nextLine
(
)
;
while
(
$line
!==
null
)
{
$done
=
preg_match
(
'/^(Index|Property changes on):/'
,
$line
)
;
if
(
$done
)
{
break
;
}
// NOTE: Before 1.5, SVN uses "Name". At 1.5 and later, SVN uses
// "Modified", "Added" and "Deleted".
$matches
=
null
;
$ok
=
preg_match
(
'/^(Name|Modified|Added|Deleted): (.*)$/'
,
$line
,
$matches
)
;
if
(
!
$ok
)
{
$this
->
didFailParse
(
pht
(
"Expected 'Name', 'Added', 'Deleted', or 'Modified'."
)
)
;
}
$op
=
$matches
[
1
]
;
$prop
=
$matches
[
2
]
;
list
(
$old
,
$new
)
=
$this
->
parseSVNPropertyChange
(
$op
,
$prop
)
;
if
(
$old
!==
null
)
{
$change
->
setOldProperty
(
$prop
,
$old
)
;
}
if
(
$new
!==
null
)
{
$change
->
setNewProperty
(
$prop
,
$new
)
;
}
$line
=
$this
->
getLine
(
)
;
}
}
private
function
parseSVNPropertyChange
(
$op
,
$prop
)
{
$old
=
array
(
)
;
$new
=
array
(
)
;
$target
=
null
;
$line
=
$this
->
nextLine
(
)
;
$prop_index
=
2
;
while
(
$line
!==
null
)
{
$done
=
preg_match
(
'/^(Modified|Added|Deleted|Index|Property changes on):/'
,
$line
)
;
if
(
$done
)
{
break
;
}
$trimline
=
ltrim
(
$line
)
;
if
(
$trimline
&&
$trimline
[
0
]
==
'#'
)
{
// in svn1.7, a line like ## -0,0 +1 ## is put between the Added: line
// and the line with the property change. If we have such a line, we'll
// just ignore it (:
$line
=
$this
->
nextLine
(
)
;
$prop_index
=
1
;
$trimline
=
ltrim
(
$line
)
;
}
if
(
$trimline
&&
$trimline
[
0
]
==
'+'
)
{
if
(
$op
==
'Deleted'
)
{
$this
->
didFailParse
(
pht
(
'Unexpected "%s" section in property deletion.'
,
'+'
)
)
;
}
$target
=
'new'
;
$line
=
substr
(
$trimline
,
$prop_index
)
;
}
else
if
(
$trimline
&&
$trimline
[
0
]
==
'-'
)
{
if
(
$op
==
'Added'
)
{
$this
->
didFailParse
(
pht
(
'Unexpected "%s" section in property addition.'
,
'-'
)
)
;
}
$target
=
'old'
;
$line
=
substr
(
$trimline
,
$prop_index
)
;
}
else
if
(
!
strncmp
(
$trimline
,
'Merged'
,
6
)
)
{
if
(
$op
==
'Added'
)
{
$target
=
'new'
;
}
else
{
// These can appear on merges. No idea how to interpret this (unclear
// what the old / new values are) and it's of dubious usefulness so
// just throw it away until someone complains.
$target
=
null
;
}
$line
=
$trimline
;
}
if
(
$target
==
'new'
)
{
$new
[
]
=
$line
;
}
else
if
(
$target
==
'old'
)
{
$old
[
]
=
$line
;
}
$line
=
$this
->
nextLine
(
)
;
}
$old
=
rtrim
(
implode
(
''
,
$old
)
)
;
$new
=
rtrim
(
implode
(
''
,
$new
)
)
;
if
(
!
strlen
(
$old
)
)
{
$old
=
null
;
}
if
(
!
strlen
(
$new
)
)
{
$new
=
null
;
}
return
array
(
$old
,
$new
)
;
}
protected
function
setIsGit
(
$git
)
{
if
(
$this
->
isGit
!==
null
&&
$this
->
isGit
!=
$git
)
{
throw
new
Exception
(
pht
(
'Git status has changed!'
)
)
;
}
$this
->
isGit
=
$git
;
return
$this
;
}
protected
function
getIsGit
(
)
{
return
$this
->
isGit
;
}
public
function
setIsMercurial
(
$is_mercurial
)
{
$this
->
isMercurial
=
$is_mercurial
;
return
$this
;
}
public
function
getIsMercurial
(
)
{
return
$this
->
isMercurial
;
}
protected
function
parseIndexHunk
(
ArcanistDiffChange
$change
)
{
$is_git
=
$this
->
getIsGit
(
)
;
$is_mercurial
=
$this
->
getIsMercurial
(
)
;
$is_svn
=
(
!
$is_git
&&
!
$is_mercurial
)
;
$move_source
=
null
;
$line
=
$this
->
getLine
(
)
;
if
(
$is_git
)
{
do
{
$patterns
=
array
(
'(?P<new>new) file mode (?P<newmode>\d+)'
,
'(?P<deleted>deleted) file mode (?P<oldmode>\d+)'
,
// These occur when someone uses `chmod` on a file.
'old mode (?P<oldmode>\d+)'
,
'new mode (?P<newmode>\d+)'
,
// These occur when you `mv` a file and git figures it out.
'similarity index '
,
'rename from (?P<old>.*)'
,
'(?P<move>rename) to (?P<cur>.*)'
,
'copy from (?P<old>.*)'
,
'(?P<copy>copy) to (?P<cur>.*)'
,
)
;
$ok
=
false
;
$match
=
null
;
foreach
(
$patterns
as
$pattern
)
{
$ok
=
preg_match
(
'@^'
.
$pattern
.
'@'
,
$line
,
$match
)
;
if
(
$ok
)
{
break
;
}
}
if
(
!
$ok
)
{
if
(
$line
===
null
||
preg_match
(
'/^(diff --git|commit) /'
,
$line
)
)
{
// In this case, there are ONLY file mode changes, or this is a
// pure move. If it's a move, flag these changesets so we can build
// synthetic changes later, enabling us to show file contents in
// Differential -- git only gives us a block like this:
//
// diff --git a/README b/READYOU
// similarity index 100%
// rename from README
// rename to READYOU
//
// ...i.e., there is no associated diff.
// This allows us to distinguish between property changes only
// and actual moves. For property changes only, we can't currently
// build a synthetic diff correctly, so just skip it.
// TODO: Build synthetic diffs for property changes, too.
if
(
$change
->
getType
(
)
!=
ArcanistDiffChangeType
::
TYPE_CHANGE
)
{
$change
->
setNeedsSyntheticGitHunks
(
true
)
;
if
(
$move_source
)
{
$move_source
->
setNeedsSyntheticGitHunks
(
true
)
;
}
}
return
;
}
break
;
}
if
(
!
empty
(
$match
[
'oldmode'
]
)
)
{
$change
->
setOldProperty
(
'unix:filemode'
,
$match
[
'oldmode'
]
)
;
}
if
(
!
empty
(
$match
[
'newmode'
]
)
)
{
$change
->
setNewProperty
(
'unix:filemode'
,
$match
[
'newmode'
]
)
;
}
if
(
!
empty
(
$match
[
'deleted'
]
)
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_DELETE
)
;
}
if
(
!
empty
(
$match
[
'new'
]
)
)
{
// If you replace a symlink with a normal file, git renders the change
// as a "delete" of the symlink plus an "add" of the new file. We
// prefer to represent this as a change.
if
(
$change
->
getType
(
)
==
ArcanistDiffChangeType
::
TYPE_DELETE
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_CHANGE
)
;
}
else
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_ADD
)
;
}
}
if
(
!
empty
(
$match
[
'old'
]
)
)
{
$match
[
'old'
]
=
self
::
unescapeFilename
(
$match
[
'old'
]
)
;
$change
->
setOldPath
(
$match
[
'old'
]
)
;
}
if
(
!
empty
(
$match
[
'cur'
]
)
)
{
$match
[
'cur'
]
=
self
::
unescapeFilename
(
$match
[
'cur'
]
)
;
$change
->
setCurrentPath
(
$match
[
'cur'
]
)
;
}
if
(
!
empty
(
$match
[
'copy'
]
)
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_COPY_HERE
)
;
$old
=
$this
->
buildChange
(
$change
->
getOldPath
(
)
)
;
$type
=
$old
->
getType
(
)
;
if
(
$type
==
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
)
{
$old
->
setType
(
ArcanistDiffChangeType
::
TYPE_MULTICOPY
)
;
}
else
{
$old
->
setType
(
ArcanistDiffChangeType
::
TYPE_COPY_AWAY
)
;
}
$old
->
addAwayPath
(
$change
->
getCurrentPath
(
)
)
;
}
if
(
!
empty
(
$match
[
'move'
]
)
)
{
$change
->
setType
(
ArcanistDiffChangeType
::
TYPE_MOVE_HERE
)
;
$old
=
$this
->
buildChange
(
$change
->
getOldPath
(
)
)
;
$type
=
$old
->
getType
(
)
;
if
(
$type
==
ArcanistDiffChangeType
::
TYPE_MULTICOPY
)
{
// Great, no change.
}
else
if
(
$type
==
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
)
{
$old
->
setType
(
ArcanistDiffChangeType
::
TYPE_MULTICOPY
)
;
}
else
if
(
$type
==
ArcanistDiffChangeType
::
TYPE_COPY_AWAY
)
{
$old
->
setType
(
ArcanistDiffChangeType
::
TYPE_MULTICOPY
)
;
}
else
{
$old
->
setType
(
ArcanistDiffChangeType
::
TYPE_MOVE_AWAY
)
;
}
// We'll reference this above.
$move_source
=
$old
;
$old
->
addAwayPath
(
$change
->
getCurrentPath
(
)
)
;
}
$line
=
$this
->
nextNonemptyLine
(
)
;
}
while
(
true
)
;
}
$line
=
$this
->
getLine
(
)
;
if
(
$is_svn
)
{
$ok
=
preg_match
(
'/^=+\s*$/'
,
$line
)
;
if
(
!
$ok
)
{
$this
->
didFailParse
(
pht
(
"Expected '%s' divider line."
,
'======================='
)
)
;
}
else
{
// Adding an empty file in SVN can produce an empty line here.
$line
=
$this
->
nextNonemptyLine
(
)
;
}
}
else
if
(
$is_git
)
{
$ok
=
preg_match
(
'/^index .*$/'
,
$line
)
;
if
(
!
$ok
)
{
// TODO: "hg diff -g" diffs ("mercurial git-style diffs") do not include
// this line, so we can't parse them if we fail on it. Maybe introduce
// a flag saying "parse this diff using relaxed git-style diff rules"?
// $this->didFailParse("Expected 'index af23f...a98bc' header line.");
}
else
{
// NOTE: In the git case, where this patch is the last change in the
// file, we may have a final terminal newline. Skip over it so that
// we'll hit the '$line === null' block below. This is covered by the
// 'git-empty-file.gitdiff' test case.
$line
=
$this
->
nextNonemptyLine
(
)
;
}
}
// If there are files with only whitespace changes and -b or -w are
// supplied as command-line flags to `diff', svn and git both produce
// changes without any body.
if
(
$line
===
null
||
preg_match
(
'/^(Index:|Property changes on:|diff --git|commit) /'
,
$line
)
)
{
return
;
}
$is_binary_add
=
preg_match
(
'/^Cannot display: file marked as a binary type\.$/'
,
rtrim
(
$line
)
)
;
if
(
$is_binary_add
)
{
$this
->
nextLine
(
)
;
// Cannot display: file marked as a binary type.
$this
->
nextNonemptyLine
(
)
;
// svn:mime-type = application/octet-stream
$this
->
markBinary
(
$change
)
;
return
;
}
// We can get this in git, or in SVN when a file exists in the repository
// WITHOUT a binary mime-type and is changed and given a binary mime-type.
$is_binary_diff
=
preg_match
(
'/^(Binary files|Files) .* and .* differ$/'
,
rtrim
(
$line
)
)
;
if
(
$is_binary_diff
)
{
$this
->
nextNonemptyLine
(
)
;
// Binary files x and y differ
$this
->
markBinary
(
$change
)
;
return
;
}
// This occurs under "hg diff --git" when a binary file is removed. See
// test case "hg-binary-delete.hgdiff". (I believe it never occurs under
// git, which reports the "files X and /dev/null differ" string above. Git
// can not apply these patches.)
$is_hg_binary_delete
=
preg_match
(
'/^Binary file .* has changed$/'
,
rtrim
(
$line
)
)
;
if
(
$is_hg_binary_delete
)
{
$this
->
nextNonemptyLine
(
)
;
$this
->
markBinary
(
$change
)
;
return
;
}
// With "git diff --binary" (not a normal mode, but one users may explicitly
// invoke and then, e.g., copy-paste into the web console) or "hg diff
// --git" (normal under hg workflows), we may encounter a literal binary
// patch.
$is_git_binary_patch
=
preg_match
(
'/^GIT binary patch$/'
,
rtrim
(
$line
)
)
;
if
(
$is_git_binary_patch
)
{
$this
->
nextLine
(
)
;
$this
->
parseGitBinaryPatch
(
)
;
$line
=
$this
->
getLine
(
)
;
if
(
preg_match
(
'/^literal/'
,
$line
)
)
{
// We may have old/new binaries (change) or just a new binary (hg add).
// If there are two blocks, parse both.
$this
->
parseGitBinaryPatch
(
)
;
}
$this
->
markBinary
(
$change
)
;
return
;
}
if
(
$is_git
)
{
// "git diff -b" ignores whitespace, but has an empty hunk target
if
(
preg_match
(
'@^diff --git .*$@'
,
$line
)
)
{
$this
->
nextLine
(
)
;
return
null
;
}
}
if
(
$this
->
isRCS
)
{
// Skip the RCS headers.
$this
->
nextLine
(
)
;
$this
->
nextLine
(
)
;
$this
->
nextLine
(
)
;
}
$old_file
=
$this
->
parseHunkTarget
(
)
;
$new_file
=
$this
->
parseHunkTarget
(
)
;
if
(
$this
->
isRCS
)
{
$change
->
setCurrentPath
(
$new_file
)
;
}
$change
->
setOldPath
(
$old_file
)
;
$this
->
parseChangeset
(
$change
)
;
}
private
function
parseGitBinaryPatch
(
)
{
// TODO: We could decode the patches, but it's a giant mess so don't bother
// for now. We'll pick up the data from the working copy in the common
// case ("arc diff").
$line
=
$this
->
getLine
(
)
;
if
(
!
preg_match
(
'/^literal /'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
"Expected '%s' to start git binary patch."
,
'literal NNNN'
)
)
;
}
do
{
$line
=
$this
->
nextLineTrimmed
(
)
;
if
(
$line
===
''
||
$line
===
null
)
{
// Some versions of Mercurial apparently omit the terminal newline,
// although it's unclear if Git will ever do this. In either case,
// rely on the base85 check for sanity.
$this
->
nextNonemptyLine
(
)
;
return
;
}
else
if
(
!
preg_match
(
'/^[a-zA-Z]/'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
'Expected base85 line length character (a-zA-Z).'
)
)
;
}
}
while
(
true
)
;
}
protected
function
parseHunkTarget
(
)
{
$line
=
$this
->
getLine
(
)
;
$matches
=
null
;
$remainder
=
'(?:\s*\(.*\))?'
;
if
(
$this
->
getIsMercurial
(
)
)
{
// Something like "Fri Aug 26 01:20:50 2005 -0700", don't bother trying
// to parse it.
$remainder
=
'\t.*'
;
}
else
if
(
$this
->
isRCS
)
{
$remainder
=
'\s.*'
;
}
else
if
(
$this
->
getIsGit
(
)
)
{
// When filenames contain spaces, Git terminates this line with a tab.
// Normally, the tab is not present. If there's a tab, ignore it.
$remainder
=
'(?:\t.*)?'
;
}
$ok
=
preg_match
(
'@^[-+]{3} (?:[ab]/)?(?P<path>.*?)'
.
$remainder
.
'$@'
,
$line
,
$matches
)
;
if
(
!
$ok
)
{
$this
->
didFailParse
(
pht
(
"Expected hunk target '%s'."
,
'+++ path/to/file.ext (revision N)'
)
)
;
}
$this
->
nextLine
(
)
;
return
$matches
[
'path'
]
;
}
protected
function
markBinary
(
ArcanistDiffChange
$change
)
{
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_BINARY
)
;
return
$this
;
}
protected
function
parseChangeset
(
ArcanistDiffChange
$change
)
{
// If a diff includes two sets of changes to the same file, let the
// second one win. In particular, this occurs when adding subdirectories
// in Subversion that contain files: the file text will be present in
// both the directory diff and the file diff. See T5555. Dropping the
// hunks lets whichever one shows up later win instead of showing changes
// twice.
$change
->
dropHunks
(
)
;
$all_changes
=
array
(
)
;
do
{
$hunk
=
new
ArcanistDiffHunk
(
)
;
$line
=
$this
->
getLineTrimmed
(
)
;
$real
=
array
(
)
;
// In the case where only one line is changed, the length is omitted.
// The final group is for git, which appends a guess at the function
// context to the diff.
$matches
=
null
;
$ok
=
preg_match
(
'/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?: .*?)?$/U'
,
$line
,
$matches
)
;
if
(
!
$ok
)
{
// It's possible we hit the style of an svn1.7 property change.
// This is a 4-line Index block, followed by an empty line, followed
// by a "Property changes on:" section similar to svn1.6.
if
(
$line
==
''
)
{
$line
=
$this
->
nextNonemptyLine
(
)
;
$ok
=
preg_match
(
'/^Property changes on:/'
,
$line
)
;
if
(
!
$ok
)
{
$this
->
didFailParse
(
pht
(
'Confused by empty line'
)
)
;
}
$line
=
$this
->
nextLine
(
)
;
return
$this
->
parsePropertyHunk
(
$change
)
;
}
$this
->
didFailParse
(
pht
(
"Expected hunk header '%s'."
,
'@@ -NN,NN +NN,NN @@'
)
)
;
}
$hunk
->
setOldOffset
(
$matches
[
1
]
)
;
$hunk
->
setNewOffset
(
$matches
[
3
]
)
;
// Cover for the cases where length wasn't present (implying one line).
$old_len
=
idx
(
$matches
,
2
)
;
if
(
!
strlen
(
$old_len
)
)
{
$old_len
=
1
;
}
$new_len
=
idx
(
$matches
,
4
)
;
if
(
!
strlen
(
$new_len
)
)
{
$new_len
=
1
;
}
$hunk
->
setOldLength
(
$old_len
)
;
$hunk
->
setNewLength
(
$new_len
)
;
$add
=
0
;
$del
=
0
;
$hit_next_hunk
=
false
;
while
(
(
(
$line
=
$this
->
nextLine
(
)
)
!==
null
)
)
{
if
(
strlen
(
rtrim
(
$line
,
"\r\n"
)
)
)
{
$char
=
$line
[
0
]
;
}
else
{
// Normally, we do not encouter empty lines in diffs, because
// unchanged lines have an initial space. However, in Git, with
// the option `diff.suppress-blank-empty` set, unchanged blank lines
// emit as completely empty. If we encounter a completely empty line,
// treat it as a ' ' (i.e., unchanged empty line) line.
$char
=
' '
;
}
switch
(
$char
)
{
case
'\\'
:
if
(
!
preg_match
(
'@\\ No newline at end of file@'
,
$line
)
)
{
$this
->
didFailParse
(
pht
(
"Expected '\ No newline at end of file'."
)
)
;
}
if
(
$new_len
)
{
$real
[
]
=
$line
;
$hunk
->
setIsMissingOldNewline
(
true
)
;
}
else
{
$real
[
]
=
$line
;
$hunk
->
setIsMissingNewNewline
(
true
)
;
}
if
(
!
$new_len
)
{
break
2
;
}
break
;
case
'+'
:
++
$add
;
--
$new_len
;
$real
[
]
=
$line
;
break
;
case
'-'
:
if
(
!
$old_len
)
{
// In this case, we've hit "---" from a new file. So don't
// advance the line cursor.
$hit_next_hunk
=
true
;
break
2
;
}
++
$del
;
--
$old_len
;
$real
[
]
=
$line
;
break
;
case
' '
:
if
(
!
$old_len
&&
!
$new_len
)
{
break
2
;
}
--
$old_len
;
--
$new_len
;
$real
[
]
=
$line
;
break
;
default
:
// We hit something, likely another hunk.
$hit_next_hunk
=
true
;
break
2
;
}
}
if
(
$old_len
||
$new_len
)
{
$this
->
didFailParse
(
pht
(
'Found the wrong number of hunk lines.'
)
)
;
}
$corpus
=
implode
(
''
,
$real
)
;
$is_binary
=
false
;
if
(
$this
->
detectBinaryFiles
)
{
$is_binary
=
!
phutil_is_utf8
(
$corpus
)
;
$try_encoding
=
$this
->
tryEncoding
;
if
(
$is_binary
&&
$try_encoding
)
{
$is_binary
=
ArcanistDiffUtils
::
isHeuristicBinaryFile
(
$corpus
)
;
if
(
!
$is_binary
)
{
$corpus
=
phutil_utf8_convert
(
$corpus
,
'UTF-8'
,
$try_encoding
)
;
if
(
!
phutil_is_utf8
(
$corpus
)
)
{
throw
new
Exception
(
pht
(
"Failed to convert a hunk from '%s' to UTF-8. "
.
"Check that the specified encoding is correct."
,
$try_encoding
)
)
;
}
}
}
}
if
(
$is_binary
)
{
// SVN happily treats binary files which aren't marked with the right
// mime type as text files. Detect that junk here and mark the file
// binary. We'll catch stuff with unicode too, but that's verboten
// anyway. If there are too many false positives with this we might
// need to make it threshold-triggered instead of triggering on any
// unprintable byte.
$change
->
setFileType
(
ArcanistDiffChangeType
::
FILE_BINARY
)
;
}
else
{
$hunk
->
setCorpus
(
$corpus
)
;
$hunk
->
setAddLines
(
$add
)
;
$hunk
->
setDelLines
(
$del
)
;
$change
->
addHunk
(
$hunk
)
;
}
if
(
!
$hit_next_hunk
)
{
$line
=
$this
->
nextNonemptyLine
(
)
;
}
}
while
(
preg_match
(
'/^@@ /'
,
$line
)
)
;
}
protected
function
buildChange
(
$path
=
null
)
{
$change
=
null
;
if
(
$path
!==
null
)
{
if
(
!
empty
(
$this
->
changes
[
$path
]
)
)
{
return
$this
->
changes
[
$path
]
;
}
}
if
(
$this
->
forcePath
)
{
return
$this
->
changes
[
$this
->
forcePath
]
;
}
$change
=
new
ArcanistDiffChange
(
)
;
if
(
$path
!==
null
)
{
$change
->
setCurrentPath
(
$path
)
;
$this
->
changes
[
$path
]
=
$change
;
}
else
{
$this
->
changes
[
]
=
$change
;
}
return
$change
;
}
protected
function
didStartParse
(
$text
)
{
$this
->
rawDiff
=
$text
;
// Eat leading whitespace. This may happen if the first change in the diff
// is an SVN property change.
$text
=
ltrim
(
$text
)
;
// Try to strip ANSI color codes from colorized diffs. ANSI color codes
// might be present in two cases:
//
// - You piped a colorized diff into 'arc --raw' or similar (normally
// we're able to disable colorization on diffs we control the generation
// of).
// - You're diffing a file which actually contains ANSI color codes.
//
// The former is vastly more likely, but we try to distinguish between the
// two cases by testing for a color code at the beginning of a line. If
// we find one, we know it's a colorized diff (since the beginning of the
// line should be "+", "-" or " " if the code is in the diff text).
//
// While it's possible a diff might be colorized and fail this test, it's
// unlikely, and it covers hg's color extension which seems to be the most
// stubborn about colorizing text despite stdout not being a TTY.
//
// We might incorrectly strip color codes from a colorized diff of a text
// file with color codes inside it, but this case is stupid and pathological
// and you've dug your own grave.
$ansi_color_pattern
=
'\x1B\[[\d;]*m'
;
if
(
preg_match
(
'/^'
.
$ansi_color_pattern
.
'/m'
,
$text
)
)
{
$text
=
preg_replace
(
'/'
.
$ansi_color_pattern
.
'/'
,
''
,
$text
)
;
}
$this
->
text
=
phutil_split_lines
(
$text
)
;
$this
->
line
=
0
;
}
protected
function
getLine
(
)
{
if
(
$this
->
text
===
null
)
{
throw
new
Exception
(
pht
(
'Not parsing!'
)
)
;
}
if
(
isset
(
$this
->
text
[
$this
->
line
]
)
)
{
return
$this
->
text
[
$this
->
line
]
;
}
return
null
;
}
protected
function
getLineTrimmed
(
)
{
$line
=
$this
->
getLine
(
)
;
if
(
$line
!==
null
)
{
$line
=
trim
(
$line
,
"\r\n"
)
;
}
return
$line
;
}
protected
function
nextLine
(
)
{
$this
->
line
++
;
return
$this
->
getLine
(
)
;
}
protected
function
nextLineTrimmed
(
)
{
$line
=
$this
->
nextLine
(
)
;
if
(
$line
!==
null
)
{
$line
=
trim
(
$line
,
"\r\n"
)
;
}
return
$line
;
}
protected
function
nextNonemptyLine
(
)
{
while
(
(
$line
=
$this
->
nextLine
(
)
)
!==
null
)
{
if
(
strlen
(
trim
(
$line
)
)
!==
0
)
{
break
;
}
}
return
$this
->
getLine
(
)
;
}
protected
function
nextLineThatLooksLikeDiffStart
(
)
{
while
(
(
$line
=
$this
->
nextLine
(
)
)
!==
null
)
{
if
(
preg_match
(
'/^\s*diff\s+-(?:r|-git)/'
,
$line
)
)
{
break
;
}
}
return
$this
->
getLine
(
)
;
}
protected
function
saveLine
(
)
{
$this
->
lineSaved
=
$this
->
line
;
}
protected
function
restoreLine
(
)
{
$this
->
line
=
$this
->
lineSaved
;
}
protected
function
isFirstNonEmptyLine
(
)
{
$len
=
count
(
$this
->
text
)
;
for
(
$ii
=
0
;
$ii
<
$len
;
$ii
++
)
{
$line
=
$this
->
text
[
$ii
]
;
if
(
!
strlen
(
trim
(
$line
)
)
)
{
// This line is empty, skip it.
continue
;
}
if
(
preg_match
(
'/^#/'
,
$line
)
)
{
// This line is a comment, skip it.
continue
;
}
return
(
$ii
==
$this
->
line
)
;
}
// Entire file is empty.
return
false
;
}
protected
function
didFinishParse
(
)
{
$this
->
text
=
null
;
}
public
function
setWriteDiffOnFailure
(
$write
)
{
$this
->
writeDiffOnFailure
=
$write
;
return
$this
;
}
protected
function
didFailParse
(
$message
)
{
$context
=
5
;
$min
=
max
(
0
,
$this
->
line
-
$context
)
;
$max
=
min
(
$this
->
line
+
$context
,
count
(
$this
->
text
)
-
1
)
;
$context
=
''
;
for
(
$ii
=
$min
;
$ii
<=
$max
;
$ii
++
)
{
$context
.=
sprintf
(
'%8.8s %6.6s %s'
,
(
$ii
==
$this
->
line
)
?
'>>> '
:
''
,
$ii
+
1
,
$this
->
text
[
$ii
]
)
;
}
$out
=
array
(
)
;
$out
[
]
=
pht
(
'Diff Parse Exception: %s'
,
$message
)
;
if
(
$this
->
writeDiffOnFailure
)
{
$temp
=
new
TempFile
(
)
;
$temp
->
setPreserveFile
(
true
)
;
Filesystem
::
writeFile
(
$temp
,
$this
->
rawDiff
)
;
$out
[
]
=
pht
(
'Raw input file was written to: %s'
,
$temp
)
;
}
$out
[
]
=
$context
;
$out
=
implode
(
"\n\n"
,
$out
)
;
throw
new
Exception
(
$out
)
;
}
/**
* Unescape escaped filenames, e.g. from "git diff".
*/
private
static
function
unescapeFilename
(
$name
)
{
if
(
preg_match
(
'/^".+"$/'
,
$name
)
)
{
return
stripcslashes
(
substr
(
$name
,
1
,
-
1
)
)
;
}
else
{
return
$name
;
}
}
private
function
loadSyntheticData
(
)
{
if
(
!
$this
->
changes
)
{
return
;
}
$repository_api
=
$this
->
repositoryAPI
;
if
(
!
$repository_api
)
{
return
;
}
$imagechanges
=
array
(
)
;
$changes
=
$this
->
changes
;
foreach
(
$changes
as
$change
)
{
$path
=
$change
->
getCurrentPath
(
)
;
// Certain types of changes (moves and copies) don't contain change data
// when expressed in raw "git diff" form. Augment any such diffs with
// textual data.
if
(
$change
->
getNeedsSyntheticGitHunks
(
)
&&
(
$repository_api
instanceof
ArcanistGitAPI
)
)
{
$diff
=
$repository_api
->
getRawDiffText
(
$path
,
$moves
=
false
)
;
// NOTE: We're reusing the parser and it doesn't reset change state
// between parses because there's an oddball SVN workflow in Phabricator
// which relies on being able to inject changes.
// TODO: Fix this.
$parser
=
clone
$this
;
$parser
->
setChanges
(
array
(
)
)
;
$raw_changes
=
$parser
->
parseDiff
(
$diff
)
;
foreach
(
$raw_changes
as
$raw_change
)
{
if
(
$raw_change
->
getCurrentPath
(
)
==
$path
)
{
$change
->
setFileType
(
$raw_change
->
getFileType
(
)
)
;
foreach
(
$raw_change
->
getHunks
(
)
as
$hunk
)
{
// Git thinks that this file has been added. But we know that it
// has been moved or copied without a change.
$hunk
->
setCorpus
(
preg_replace
(
'/^\+/m'
,
' '
,
$hunk
->
getCorpus
(
)
)
)
;
$change
->
addHunk
(
$hunk
)
;
}
break
;
}
}
$change
->
setNeedsSyntheticGitHunks
(
false
)
;
}
if
(
$change
->
getFileType
(
)
!=
ArcanistDiffChangeType
::
FILE_BINARY
&&
$change
->
getFileType
(
)
!=
ArcanistDiffChangeType
::
FILE_IMAGE
)
{
continue
;
}
$imagechanges
[
$path
]
=
$change
;
}
// Fetch the actual file contents in batches so repositories
// that have slow random file accesses (i.e. mercurial) can
// optimize the retrieval.
$paths
=
array_keys
(
$imagechanges
)
;
$filedata
=
$repository_api
->
getBulkOriginalFileData
(
$paths
)
;
foreach
(
$filedata
as
$path
=>
$data
)
{
$imagechanges
[
$path
]
->
setOriginalFileData
(
$data
)
;
}
$filedata
=
$repository_api
->
getBulkCurrentFileData
(
$paths
)
;
foreach
(
$filedata
as
$path
=>
$data
)
{
$imagechanges
[
$path
]
->
setCurrentFileData
(
$data
)
;
}
$this
->
changes
=
$changes
;
}
/**
* Extracts the common filename from two strings with differing path
* prefixes as found after `diff --git`. These strings may be
* quoted; if so, the filename is returned unescaped. The prefixes
* default to "a/" and "b/", but may be any string -- or may be
* entierly absent. This function may return "null" if the hunk
* represents a file move or copy, and with pathological renames may
* return an incorrect value. Such cases are expected to be
* recovered by later rename detection codepaths.
*
* @param string Text from a diff line after "diff --git ".
* @return string Filename being altered, or null for a rename.
*/
public
static
function
extractGitCommonFilename
(
$paths
)
{
$matches
=
null
;
$paths
=
rtrim
(
$paths
,
"\r\n"
)
;
// Try the exact same string twice in a row separated by a
// space, with an optional prefix. This can hit a false
// positive for moves from files like "old file old" to "file",
// but such a cases will be caught by the "rename from" /
// "rename to" lines.
$prefix
=
'(?:[^/]+/)?'
;
$pattern
=
"@^(?P<old>(?P<oldq>\"?){$prefix}(?P<common>.+)\\k<oldq>)"
.
" "
.
"(?P<new>(?P<newq>\"?){$prefix}\\k<common>\\k<newq>)$@"
;
if
(
!
preg_match
(
$pattern
,
$paths
,
$matches
)
)
{
// A rename or some form; return null for now, and let the
// "rename from" / "rename to" lines fix it up.
return
null
;
}
// Use the common subpart. There may be ambiguity here: "src/file
// dst/file" may _either_ be a prefix-less move, or a change with
// two custom prefixes. We assume it is the latter; if it is a
// rename, diff parsing will update based on the "rename from" /
// "rename to" lines.
// This re-assembles with the differing prefixes removed, but the
// quoting from the original. Necessary so we know if we should
// unescape characters from the common string.
$new
=
$matches
[
'newq'
]
.
$matches
[
'common'
]
.
$matches
[
'newq'
]
;
$new
=
self
::
unescapeFilename
(
$new
)
;
return
$new
;
}
/**
* Strip the header and footer off a `git-format-patch` diff.
*
* Returns a parseable normal diff and a textual commit message.
*/
private
function
stripGitFormatPatch
(
$diff
)
{
// We can parse this by splitting it into two pieces over and over again
// along different section dividers:
//
// 1. Mail headers.
// 2. ("\n\n")
// 3. Mail body.
// 4. ("---")
// 5. Diff stat section.
// 6. ("\n\n")
// 7. Actual diff body.
// 8. ("--")
// 9. Patch footer.
list
(
$head
,
$tail
)
=
preg_split
(
'/^---$/m'
,
$diff
,
2
)
;
list
(
$mail_headers
,
$mail_body
)
=
explode
(
"\n\n"
,
$head
,
2
)
;
list
(
$body
,
$foot
)
=
preg_split
(
'/^-- ?$/m'
,
$tail
,
2
)
;
list
(
$stat
,
$diff
)
=
explode
(
"\n\n"
,
$body
,
2
)
;
// Rebuild the commit message by putting the subject line back on top of it,
// if we can find one.
$matches
=
null
;
$pattern
=
'/^Subject: (?:\[PATCH\] )?(.*)$/mi'
;
if
(
preg_match
(
$pattern
,
$mail_headers
,
$matches
)
)
{
$mail_body
=
$matches
[
1
]
.
"\n\n"
.
$mail_body
;
$mail_body
=
rtrim
(
$mail_body
)
;
}
return
array
(
$mail_body
,
$diff
)
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sun, Jan 19, 14:30 (3 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1125515
Default Alt Text
ArcanistDiffParser.php (43 KB)
Attached To
Mode
rARC Arcanist
Attached
Detach File
Event Timeline
Log In to Comment