/
Text.php
139 lines (125 loc) · 2.75 KB
/
Text.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
<?php
namespace Inml;
use \Inml\Text\Paragraph;
use \Inml\Text\Line;
use \Inml\Text\Word;
/**
* Class to store array of Paragraph objects
*
* @author Petr Trofimov <petrofimov@yandex.ru>
*/
class Text implements \Countable, \IteratorAggregate
{
/**
* Array of Paragraph objects
*
* @var Paragraph[]
*/
private $paragraphs = [];
/**
* Non-parsed string
*
* @var string
*/
private $rawString;
/**
* Array of defines
*
* @var array
*/
private $defines = [];
/**
* Constructor
*
* - normalizes string
* - splits one string into many Paragraphs objects
*
* @param string $string String to parse
*/
public function __construct($string)
{
$this->rawString = $string;
$parts = explode(Paragraph::SEPARATOR, $this->normalize($string));
foreach ($parts as $part) {
$paragraph = new Paragraph($part);
$this->defines = array_merge($this->defines,
$paragraph->getDefines());
if (!$paragraph->isEmpty()) {
$this->paragraphs[] = $paragraph;
}
}
}
/**
* Transforms text in order to normalize
*
* - trims text
* - removes double spaces
* - normalizes line break symbols
*
* @param string $text
* @return string
*/
public function normalize($text)
{
$text = preg_replace('/[ \t]+/', ' ', trim($text));
$text = str_replace("\r\n", Line::SEPARATOR, $text);
$text = str_replace("\r", Line::SEPARATOR, $text);
$text = preg_replace('/[\n]{2,}/', Paragraph::SEPARATOR, $text);
$text = preg_replace('/ ?\n ?/', Line::SEPARATOR, $text);
return $text;
}
/**
* Implementation of \IteratorAggregate interface
*
* @return \ArrayObject
*/
public function getIterator()
{
return new \ArrayObject($this->paragraphs);
}
/**
* Implementation of \Countable interface
*
* @return int
*/
public function count()
{
return $this->getCount();
}
/**
* Returns count of paragraphs in text
*
* @return int
*/
public function getCount()
{
return count($this->paragraphs);
}
/**
* True if there are no paragraphs in text
*
* @return bool
*/
public function isEmpty()
{
return !$this->getCount();
}
/**
* Returns non-parsed string
*
* @return string
*/
public function getRawString()
{
return $this->rawString;
}
/**
* Returns array of defines
*
* @return array
*/
public function getDefines()
{
return $this->defines;
}
}