/
import_modify.php
717 lines (597 loc) · 38 KB
/
import_modify.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./import_modify.php
// Repository: $HeadURL: http://svn.code.sf.net/p/refbase/code/trunk/import_modify.php $
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
//
// Created: 17-Feb-06, 20:57
// Modified: $Date: 2012-02-28 15:23:23 -0800 (Tue, 28 Feb 2012) $
// $Author: msteffens $
// $Revision: 1343 $
// This php script accepts input from 'import.php' and will process records exported from Endnote, Reference Manager (RIS), BibTeX, ISI Web of Science,
// Pubmed, CSA or Copac. In case of a single record, the script will call 'record.php' with all provided fields pre-filled. The user can then verify
// the data, add or modify any details as necessary and add the record to the database. Multiple records will be imported directly.
// TODO: I18n
// Incorporate some include files:
include 'initialize/db.inc.php'; // 'db.inc.php' is included to hide username and password
include 'includes/include.inc.php'; // include common functions
include 'includes/execute.inc.php'; // include functions that deal with execution of shell commands
include 'includes/import.inc.php'; // include common import functions
include 'initialize/ini.inc.php'; // include common variables
// --------------------------------------------------------------------
// START A SESSION:
// call the 'start_session()' function (from 'include.inc.php') which will also read out available session variables:
start_session(true);
// --------------------------------------------------------------------
// Initialize preferred display language:
// (note that 'locales.inc.php' has to be included *after* the call to the 'start_session()' function)
include 'includes/locales.inc.php'; // include the locales
// --------------------------------------------------------------------
// Clear any errors that might have been found previously:
$errors = array();
// Write the (POST or GET) form variables into an array:
foreach($_REQUEST as $varname => $value)
{
// remove slashes from parameter values if 'magic_quotes_gpc = On':
$formVars[$varname] = stripSlashesIfMagicQuotes($value); // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php'
}
// --------------------------------------------------------------------
// Extract the ID of the client from which the query originated:
// this identifier is used to identify queries that originated from the refbase command line clients ("cli-refbase-1.0.1", "cli-refbase_import-1.0") or from a bookmarklet (e.g., "jsb-refbase-1.0.0")
if (isset($formVars['client']))
$client = $formVars['client'];
else
$client = "";
if (preg_match("/^jsb/i", $client)) // if data were sent via a bookmarklet, we set some variables directly
{
$formVars['formType'] = "import";
$formVars['importRecordsRadio'] = "all";
$formVars['importRecords'] = "1";
$formVars['showSource'] = "1";
}
// Save the URL of the referring page the 'referer' session variable:
// NOTE: For 'import_modify.php' we probably want to *always* set the referrer to 'import.php' since the preference of function 'start_session()'
// for a referrer that was saved in a session variable may lead back to the wrong page if the user used the back button of his browser.
// This happens e.g. if:
// 1. the user imports, say, ID 'arXiv:cond-mat/0703452' which gets loaded into the 'record.php' form
// 2. the user uses his browser's back button to switch back to the 'import.php' form
// 3. the user attempts to import 'arXiv:cond-mat/070345' (which is an incorrect arXiv ID)
// In that case, if the referrer gets loaded from the session variable, it will redirect back to 'record.php' (instead of 'import.php').
// This can be circumvented either by saving the '$_SERVER['HTTP_REFERER']' to the 'referer' session variable explicitly, or by simply
// hardcoding '$referer' to "import.php" (which is what we do here)
// $referer = $_SERVER['HTTP_REFERER'];
// saveSessionVariable("referer", $referer); // function 'saveSessionVariable()' is defined in 'include.inc.php'
// Set the default referrer if no referrer is available or if it just points to 'index.php' (or if the data were sent via a bookmarklet):
// if (empty($referer) OR ($referer == "index.php") OR preg_match("/^jsb/i", $client)) // variable '$referer' is globally defined in function 'start_session()' in 'include.inc.php'
$referer = "import.php"; // on error, we'll (by default) redirect to the import form
// First of all, check if the user is logged in:
if (!isset($_SESSION['loginEmail'])) // -> if the user isn't logged in
{
header("Location: user_login.php?referer=" . rawurlencode($referer)); // ask the user to login first, then he'll get directed back to the calling page (normally, 'import.php')
exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
// now, check if the (logged in) user is allowed to import any record into the database:
if (isset($_SESSION['user_permissions']) AND !preg_match("/allow_import|allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does NOT contain either 'allow_import' or 'allow_batch_import'...
{
// return an appropriate error message:
$HeaderString = returnMsg($loc["NoPermission"] . $loc["NoPermission_ForImport"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
if (!preg_match("/^cli/i", $client))
header("Location: index.php"); // redirect back to main page ('index.php')
exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
// --------------------------------------------------------------------
// EXTRACT FORM VARIABLES:
// Note: Although we could use the '$formVars' array directly below (e.g.: $formVars['sourceText'] etc., like in 'user_validation.php'), we'll read out
// all variables individually again. This is done to enhance readability. (A smarter way of doing so seems to be the use of the 'extract()' function, but that
// may expose yet another security hole...)
// Get the form used by the user:
if (isset($formVars['formType']))
$formType = $formVars['formType'];
else
$formType = "import";
// In case of the main import form, get the source text containing the bibliographic record(s):
// Note that data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
if (isset($formVars['sourceText']))
$sourceText = $formVars['sourceText'];
else
$sourceText = "";
// In case of the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL), get the entered IDs:
if (isset($formVars['sourceIDs']))
$sourceIDs = $formVars['sourceIDs'];
else
$sourceIDs = "";
// If data were sent via a bookmarklet, get the URL containing the posted data:
if (isset($formVars['sourceURL']))
$sourceURL = $formVars['sourceURL'];
else
$sourceURL = "";
// Check whether we're supposed to display the original source data:
if (isset($formVars['showSource']))
$showSource = $formVars['showSource'];
else
$showSource = "";
if (isset($_SESSION['user_permissions']) AND preg_match("/allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does contain 'allow_batch_import'...
{
// Check whether we're supposed to import all records ('all') or just particular ones ('only'):
if (isset($formVars['importRecordsRadio']))
$importRecordsRadio = $formVars['importRecordsRadio'];
else
$importRecordsRadio = "";
// Get the record numbers of those records that shall be imported:
// examples of recognized formats: '1-5' imports the first five records; '1 3 7' will import records 1, 3 and 7; '1-3 5-7 9' will import records 1, 2, 3, 5, 6, 7 and 9
// (note that the first three records could be labelled e.g. as 'Record 12 of 52', 'Record 30 of 112' and 'Record 202 of 533' but they must be referred to as records '1-3'
// in the 'importRecords' form)
if (isset($formVars['importRecords']))
$importRecords = $formVars['importRecords'];
else
$importRecords = "";
}
else // if the user is only allowed to import one record at a time, we'll always import the very first record
{
$importRecordsRadio = "only";
$importRecords = "1";
}
// Check whether we're supposed to skip records with unrecognized data format:
if (isset($formVars['skipBadRecords']))
$skipBadRecords = $formVars['skipBadRecords'];
else
$skipBadRecords = "";
// Check if a file was uploaded:
// (note that to have file uploads work, HTTP file uploads must be allowed within your 'php.ini' configuration file
// by setting the 'file_uploads' parameter to 'On'!)
// extract file information into a four (or five) element associative array containing the following information about the file:
// name - original name of file on client
// type - MIME type of file
// tmp_name - name of temporary file on server
// error - holds an error number >0 if something went wrong, otherwise 0 (I don't know when this element was added. It may not be present in your PHP version... ?:-/)
// size - size of file in bytes
// depending what happend on upload, they will contain the following values (PHP 4.1 and above):
// no file upload upload exceeds 'upload_max_filesize' successful upload
// -------------- ------------------------------------ -----------------
// name "" [name] [name]
// type "" "" [type]
// tmp_name "" OR "none" "" [tmp_name]
// error 4 1 0
// size 0 0 [size]
$uploadFile = getUploadInfo("uploadFile"); // function 'getUploadInfo()' is defined in 'include.inc.php'
$tmpFilePath = "";
// Validate the 'uploadFile' field:
// TODO: Move code that validates file uploads into its own function (and merge with related code from 'modify.php')
// (which must not exceed the 'upload_max_filesize' specified within your 'php.ini' configuration file)
if (!empty($uploadFile) && !empty($uploadFile["name"])) // if the user attempted to upload a file
{
// The 'is_uploaded_file()' function returns 'true' if the file indicated by '$uploadFile["tmp_name"]' was uploaded via HTTP POST. This is useful to help ensure
// that a malicious user hasn't tried to trick the script into working on files upon which it should not be working - for instance, /etc/passwd.
if (is_uploaded_file($uploadFile["tmp_name"]))
{
if (empty($uploadFile["tmp_name"])) // no tmp file exists => we assume that the maximum upload file size was exceeded!
// or check via 'error' element instead: "if ($uploadFile["error"] == 1)" (the 'error' element exists since PHP 4.2.0)
{
$maxFileSize = ini_get("upload_max_filesize");
$fileError = "File size must not be greater than " . $maxFileSize . ":";
$errors["uploadFile"] = $fileError; // inform the user that the maximum upload file size was exceeded
}
else // a tmp file exists...
{
// prevent hackers from gaining access to the systems 'passwd' file (this should be prevented by the 'is_uploaded_file()' function but anyhow):
if (preg_match("/^passwd$/i", $uploadFile["name"])) // file name must not be 'passwd'
$errors["uploadFile"] = "This file name is not allowed!";
// check for invalid file name extensions:
elseif (preg_match("/\.(exe|com|bat|zip|php|phps|php3|cgi)$/i", $uploadFile["name"])) // file name has an invalid file name extension (adjust the regex pattern if you want more relaxed file name validation)
$errors["uploadFile"] = "You cannot upload this type of file!"; // file name must not end with .exe, .com, .bat, .zip, .php, .phps, .php3 or .cgi
else
$tmpFilePath = $uploadFile["tmp_name"];
}
}
else
{
switch($uploadFile["error"])
{
case 0: // no error; possible file attack!
$errors["uploadFile"] = "There was a problem with your upload.";
break;
case 1: // uploaded file exceeds the 'upload_max_filesize' directive in 'php.ini'
$maxFileSize = ini_get("upload_max_filesize");
$fileError = "File size must not be greater than " . $maxFileSize . ":";
$errors["uploadFile"] = $fileError;
break;
case 2: // uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the html form (Note: refbase doesn't currently specify MAX_FILE_SIZE but anyhow...)
$errors["uploadFile"] = "The file you are trying to upload is too big.";
break;
case 3: // uploaded file was only partially uploaded
$errors["uploadFile"] = "The file you are trying to upload was only partially uploaded.";
break;
case 4: // no file was uploaded
$errors["uploadFile"] = "You must select a file for upload.";
break;
case 6:
$errors["uploadFile"] = "Missing a temporary folder.";
break;
default: // a default error, just in case! :)
$errors["uploadFile"] = "There was a problem with your upload.";
break;
}
}
}
if (!empty($uploadFile) && !empty($tmpFilePath)) // if there was a file uploaded successfully
{
// Get file contents:
$fileData = readFromFile($tmpFilePath); // function 'readFromFile()' is defined in 'execute.inc.php'
if (!empty($fileData))
// Data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
$sourceText = $fileData;
}
// --------------------------------------------------------------------
// PRE-PROCESS DATA INPUT:
// In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
// NOTE: For a latin1-based database, data pasted into the 'sourceText' text entry field will be always returned in ISO-8859-1 encoding (see notes above function
// 'decodeHTML()' below). However, data that were received via a file upload (or from a client such as Bookends) will have the encoding of the original file
// (which may be UTF-8 encoded).
if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8")) // function 'detectCharacterEncoding()' is defined in 'include.inc.php'
$sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8"); // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
// Decode any HTML entities remaining in the source text:
// NOTE: - Web browsers send back form data in the same encoding as the page containing the form. So if a user imports UTF-8 data (via the 'sourceText' text entry form) into
// a latin1-based database, non-latin1 characters will be encoded by the browser as HTML entities (e.g., the greek delta character would be represented as 'δ'
// in the source text). Therefore, we'll use function 'decodeHTML()' to convert any remaining HTML entities first to UTF-8, then convert Unicode entities to refbase
// markup (if possible), and finally transform all Unicode characters that can't be successfully converted to their ASCII equivalents.
// - Alternatively, it might be easier to always use UTF-8 as page encoding for 'import.php' so that we'll always receive UTF-8 encoded data, then use function
// 'detectCharacterEncoding()' to detect the actual character encoding of the given source text, and convert to refbase markup/latin1 if needed.
//
// TODO: - this conversion causes invalid XML when importing MODS XML that contains encoded angle brackets! (e.g. '<title>Harbours <dt.></title>');
// to work around this issue, the next line needs to be commented out; for a real fix, the decoding of characters needs to be adopted based on the '$sourceFormat'
// (which, ATM, is only identified further down below)
$sourceText = decodeHTML($contentTypeCharset, $sourceText); // function 'decodeHTML()' is defined in 'include.inc.php', and '$contentTypeCharset' is defined in 'ini.inc.php'
// Process record number input:
$importRecordNumbersArray = array(); // initialize array variable which will hold all the record numbers that shall be imported
if (!empty($importRecords))
{
// split input string on all but digits or the hyphen ("-") character:
// (the 'PREG_SPLIT_NO_EMPTY' flag causes only non-empty pieces to be returned)
$importRecordsArray = preg_split("/[^0-9-]+/", $importRecords, -1, PREG_SPLIT_NO_EMPTY); // this keeps only elements such as '1', '3-5', '3-5-9' or '3-' (we'll deal with the last two cases below)
foreach ($importRecordsArray as $importRecordsElement)
{
if (preg_match("/\d+-\d+/", $importRecordsElement)) // if we're dealing with a range of record numbers (such as '1-5')
{
$importRecordsElementArray = preg_split("/-/", $importRecordsElement); // split input string on hyphen ("-") character
// generate an array that includes all numbers from start number to end number:
// (in case of incorrect input (such as '3-5-9') we'll only take the first two numbers and ignore anything else)
$importRecordRangeArray = range($importRecordsElementArray[0], $importRecordsElementArray[1]);
foreach ($importRecordRangeArray as $importRecordNumber) // append all record numbers within range to array
$importRecordNumbersArray[] = $importRecordNumber;
}
else // this element contains just a single record number
{
// append this record number to array:
$importRecordNumbersArray[] = preg_replace("/(\d+).*/", "\\1", $importRecordsElement); // we account for the case that '$importRecordsElement' contains something like '3-'
}
}
}
// validation will throw up an error if we're supposed to import only particular records but no record numbers were specified
// Remove any duplicate record number(s) from the list of extracted record numbers:
$importRecordNumbersArray = array_unique($importRecordNumbersArray);
// --------------------------------------------------------------------
// IDENTIFY SOURCE FORMAT:
// if the source text originated from the main 'import' form provided by 'import.php':
if ($formType == "import")
// attempt to identify the format of the input text:
$sourceFormat = identifySourceFormat($sourceText); // function 'identifySourceFormat()' is defined in 'import.inc.php'
// else if source text originated from the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL):
elseif ($formType == "importID")
$sourceFormat = identifySourceID($sourceIDs); // function 'identifySourceID()' is defined in 'import.inc.php'
// --------------------------------------------------------------------
// FETCH DATA FROM URL:
// In case of import via ID:
// TODO: Modify the code so that '$sourceIDs' can contain a mixture of any supported IDs.
if (($formType == "importID") AND !empty($sourceIDs) AND !empty($sourceFormat))
{
// - PubMed IDs:
if (preg_match("/^Pubmed (Medline|XML)$/i", $sourceFormat) AND preg_match("/[0-9]/", $sourceIDs))
{
// Split on any whitespace between PubMed IDs:
$idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
// Fetch source data from PubMed.gov for all given PubMed IDs:
list($errors, $sourceText) = fetchDataFromPubMed($idArray, $sourceFormat); // function 'fetchDataFromPubMed()' is defined in 'import.inc.php'
}
// - arXiv IDs:
elseif (preg_match("/^arXiv XML$/i", $sourceFormat) AND preg_match("#(arXiv:|http://arxiv\.org/abs/)?([\w.-]+/\d{7}|\d{4}\.\d{4,})(v\d+)?#i", $sourceIDs))
{
// Remove any "arXiv:" or "http://arxiv.org/abs/" prefixes from the ID string:
$sourceIDs = preg_replace("#(?<=^|\s)(arXiv:|http://arxiv\.org/abs/)#", "", $sourceIDs);
// Split on any whitespace between arXiv IDs:
$idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
// Fetch source data from arXiv.org for all given arXiv IDs:
list($errors, $sourceText) = fetchDataFromArXiv($idArray, $sourceFormat); // function 'fetchDataFromArXiv()' is defined in 'import.inc.php'
// NOTE: In case of function 'fetchDataFromArXiv()', variable '$sourceText' contains the SimplePie object with the parsed Atom XML feed
// TODO: This is inconsistent with the behaviour of the other 'fetchData*()' functions and we should do something about it!
// NOTE: Since, for arXiv IDs, '$sourceText' contains the SimplePie object (and not just text), handling of any encoding issues is done
// within function 'arxivToRefbase()'
}
// - DOIs/OpenURLs:
// TODO: - to support OpenURL context objects from COinS or Atom XML, we need to decode ampersand characters ('&' -> '&'), and allow for OpenURLs that don't start with '?' or '&'
elseif (preg_match("/^CrossRef XML$/i", $sourceFormat) AND (preg_match("#(?<=^|\s)(doi:|http://dx\.doi\.org/)?10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs) OR preg_match("#(?<=^|\s)(openurl:|http://.+?(?=\?))?.*?(?<=[?&])ctx_ver=Z39\.88-2004(?=&|$).*?(?=$|\s)#i", $sourceIDs)))
{
// Remove any prefixes (like "doi:", "openurl:", "http://dx.doi.org/" or "http://...?") from the ID string:
$sourceIDs = preg_replace("#(?<=^|\s)(doi:|http://dx\.doi\.org/)#", "", $sourceIDs);
$sourceIDs = preg_replace("#(?<=^|\s)(openurl:|http://.+?(?=\?))#", "", $sourceIDs);
// Split on any whitespace between DOIs/OpenURLs:
$idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
// Try to retrieve information from PubMed.gov before querying CrossRef.org:
// TODO: Test with $sourceIDs containing a mixture of DOIs and OpenURLs, as well as with $sourceIDs containing DOIs for articles listed in PubMed AND NOT listed in PubMed!
if (preg_match("#10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs))
{
list($errors, $sourceText, $idArray) = fetchDOIsFromPubMed($idArray); // function 'fetchDOIsFromPubMed()' is defined in 'import.inc.php'
}
if (!empty($idArray))
{
// Fetch record metadata from CrossRef.org for all given DOIs/OpenURLs:
list($errors, $sourceText) = fetchDataFromCrossRef($idArray, $sourceFormat); // function 'fetchDataFromCrossRef()' is defined in 'import.inc.php'
// In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8"))
$sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8");
}
else
{
$sourceFormat = "Pubmed Medline";
}
}
}
// --------------------------------------------------------------------
// PARSE SOURCE TEXT:
if (!empty($sourceText) AND !empty($sourceFormat))
{
// fetch the path/name of the import format file that's associated with the import format given in '$sourceFormat':
$importFormatFile = getFormatFile($sourceFormat, "import"); // function 'getFormatFile()' is defined in 'include.inc.php()'
if (!empty($importFormatFile))
{
// Get all cite keys specified by the current user and build an array of uniquified cite keys ('$citeKeysArray')
// which is used to ensure uniqueness of generated cite keys among all imported records as well as the user's existing records:
$userCiteKeysArray = getUserCiteKeys($loginUserID); // '$loginUserID' is provided as session variable on login; function 'getUserCiteKeys()' is defined in 'include.inc.php'
// Get all user options for the current user (which is required by function 'generateCiteKey()'
// that, in turn, is called below & from within the 'addRecords()' function):
$userOptionsArray = getUserOptions($loginUserID); // function 'getUserOptions()' is defined in 'include.inc.php'
// Include the found import format file *once*:
include_once "import/" . $importFormatFile;
// Parse records from the specified import format:
// function 'importRecords()' is defined in the import format file given in '$importFormatFile' (which, in turn, must reside in the 'import' directory of the refbase root directory)
// NOTE: see note above below the 'fetchDataFromArXiv()' function
list($importDataArray, $recordsCount, $importRecordNumbersRecognizedFormatArray, $importRecordNumbersNotRecognizedFormatArray, $errors) = importRecords($sourceText, $importRecordsRadio, $importRecordNumbersArray);
}
else
$errors["sourceText"] = "Sorry, but the $sourceFormat importer is currently not available!";
}
else
{
$importDataArray = array();
$recordsCount = 0;
$importRecordNumbersRecognizedFormatArray = array();
$importRecordNumbersNotRecognizedFormatArray = array();
}
// --------------------------------------------------------------------
// VALIDATE DATA FIELDS:
// For each parsed record, function 'validateRecords()' (in 'import.inc.php') will assign errors to '$errors["sourceText"]'.
// In case of the "Import IDs" form, we'll redirect these error messages to '$errors["sourceIDs"]':
if (($formType == "importID") AND isset($errors["sourceText"])) // some errors occurred
{
$errors["sourceIDs"] = $errors["sourceText"];
unset($errors["sourceText"]);
}
// Verify that some source text was given:
if (($formType == "import") AND empty($sourceText)) // no source data given
$errors["sourceText"] = "Source data missing!";
elseif (($formType == "importID") AND !isset($errors["sourceIDs"]) AND (empty($sourceIDs) OR empty($sourceFormat))) // no recognized IDs given
$errors["sourceIDs"] = "You must specify at least one valid ID!";
// If some source data were given but the source text format wasn't among the recognized formats:
elseif (empty($sourceFormat))
$errors["sourceText"] = "Unrecognized data format!";
// Validate the 'importRecords' text entry field...
elseif ($importRecordsRadio == "only") // ...if we're supposed to import only particular records
{
// ...make sure that some records were specified and that they are actually available in the input data:
if (empty($importRecords) OR !preg_match("/[0-9]/", $importRecords)) // partial import requested but no record numbers given
{
$errors["importRecords"] = "Record number(s) missing!";
}
else // if some record numbers were given, check that these numbers are actually available in the input data:
{
$availableRecordNumbersArray = range(1, $recordsCount); // construct an array of available record numbers
// get all record numbers to import which are NOT available in the source data:
$importRecordNumbersNotAvailableArray = array_diff($importRecordNumbersArray, $availableRecordNumbersArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$availableRecordNumbersArray'
// just FYI, the line below would get all record numbers to import which ARE actually available in the source data:
// $importRecordNumbersAvailableArray = array_diff($importRecordNumbersArray, $importRecordNumbersNotAvailableArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$importRecordNumbersNotAvailableArray'
if (!empty($importRecordNumbersNotAvailableArray)) // the user did request to import some record(s) that don't exist in the pasted source data
{
if ($recordsCount == 1) // one record available
$errors["importRecords"] = "Only one record available! You can only use record number '1'.";
else // several records available
$errors["importRecords"] = "Only " . $recordsCount . " records available! You can only use record numbers '1-" . $recordsCount . "'.";
}
}
}
// the user did enter some source text and did input some recognized record numbers
if (!empty($sourceText))
{
// NOTE: validation of individual records is done within the import functions and the '$errors' array is modified within these functions if any records of unrecognized format are found
if (empty($importRecordNumbersRecognizedFormatArray)) // if none of the records to import had a recognized format
{
// we'll file an additional error element here, which will indicate whether the 'Skip records with unrecognized data format' checkbox shall be displayed or not
$errors["badRecords"] = "all";
if (!empty($sourceFormat) AND (count($importRecordNumbersNotRecognizedFormatArray) > 1)) // if the user attempted to import more than one record
$errors["skipBadRecords"] = "Sorry, but all of the specified records were of unrecognized data format!";
else // user tried to import one single record (will be also triggered if '$importRecords' is empty)
$errors["skipBadRecords"] = ""; // we insert an empty 'skipBadRecords' element so that 'import.php' does the right thing
}
elseif (!empty($importRecordNumbersNotRecognizedFormatArray)) // some records had a recognized format but some were NOT recognized
{
$errors["badRecords"] = "some"; // see note above
$errors["skipBadRecords"] = "Skip records with unrecognized data format";
}
}
else
{
$errors["badRecords"] = "all";
}
// --------------------------------------------------------------------
// Check if there were any validation errors:
if (count($errors) > 0)
{
// we ignore errors regarding records with unrecognized format if:
// - at least some of the specified records had a valid data format and
// - the user did mark the 'Skip records with unrecognized data format' checkbox
if (!(($errors["badRecords"] == "some") AND ($skipBadRecords == "1")))
{
// ...otherwise we'll present the error message(s):
if (preg_match("/^be/i", $client)) // if the query originated from a Bookends upload request ("be-bookends_import-1.0")
{
// Include errors in redirection request:
$redirectURL = $referer . "?";
foreach ($errors as $varname => $value)
$redirectURL .= "&" . $varname . "=" . rawurlencode($value);
header("Location: " . $redirectURL);
}
elseif (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
{
echo "There were validation errors regarding the data you submitted:\n\n";
if (($errors["badRecords"] == "all") && (!empty($errors["skipBadRecords"])))
$skipBadInfo = $errors["skipBadRecords"] . "\n\n";
elseif ($errors["badRecords"] == "some")
$skipBadInfo = "Use '--skipbad=1' to skip records with unrecognized data format.\n\n";
else
$skipBadInfo = "";
unset($errors["badRecords"]);
unset($errors["skipBadRecords"]);
foreach ($errors as $varname => $value)
{
$value = preg_replace("/<br>/i", "\n ", $value);
echo $varname . ": " . $value . "\n\n";
}
echo $skipBadInfo;
}
else
{
// Write back session variables:
saveSessionVariable("errors", $errors); // function 'saveSessionVariable()' is defined in 'include.inc.php'
saveSessionVariable("formVars", $formVars);
// Redirect the browser back to the import form:
header("Location: " . $referer);
}
exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
}
// --------------------------------------------------------------------
// If we made it here, then the data is considered valid!
// IMPORT RECORDS:
$importedRecordsArray = array();
if ((count($importRecordNumbersRecognizedFormatArray) == 1) AND !preg_match("/^(cli|be)/i", $client)) // if this is the only record we'll need to import -AND- if the import didn't originate from a refbase command line client:
{
// If no specific cite key exists in the record data, any existing 'call_number' string gets also copied to the
// user-specific 'cite_key' field (which will ensure that this original call number/cite key is retained as
// cite key upon export); however, note that (depending on the user's settings) the cite key may get modified
// or regenerated by function 'generateCiteKey()' below
if (!empty($importDataArray['records'][0]['call_number']) AND empty($importDataArray['records'][0]['cite_key']))
$importDataArray['records'][0]['cite_key'] = $importDataArray['records'][0]['call_number'];
// This is a stupid hack that maps the names of the '$importDataArray['records'][0]' array keys to those
// used by the '$parsedRecordFormVars' (='$formVars') array (which is required by function 'generateCiteKey()')
// (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
$parsedRecordFormVars = buildFormVarsArray($importDataArray['records'][0]); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
// Generate or modify (e.g. uniquify) the cite key for this record:
$importDataArray['records'][0]['cite_key'] = generateCiteKey($parsedRecordFormVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
// save import data to session variable:
// NOTE: Saving import data to a session variable allows to retain large param/value strings (that would exceed
// the maximum string limit for GET requests). This works around a limitation in Internet Explorer which
// has a maximum URL length of 2,083 characters & a maximum path length of 2,048 characters.
// More info: <http://support.microsoft.com/kb/208427/EN-US/>
saveSessionVariable("importData", $importDataArray['records'][0]);
// RELOCATE TO IMPORT PAGE:
// call 'record.php' and load the form fields with the data of the current record
header("Location: record.php?recordAction=add&mode=import&importSource=generic");
exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
else // import record(s) directly:
{
// Add all records to the database (i.e., for each record, add a row entry to MySQL table 'refs'):
// ('$importedRecordsArray' will hold the serial numbers of all newly imported records)
$importedRecordsArray = addRecords($importDataArray); // function 'addRecords()' is defined in 'include.inc.php'
}
// --------------------------------------------------------------------
// DISPLAY RESULTS
if (!empty($importedRecordsArray)) // if some records were successfully imported
{
$importedRecordsCount = count($importedRecordsArray);
// build string of record serial numbers (to be used with the 'records' query parameter):
$recordSerialsQueryString = $importedRecordsArray[0]; // add first serial number
for ($i=1; $i < $importedRecordsCount; $i++) // for the second to the last serial number...
{
// implode consecutive serial numbers into a range (e.g. transform "150,151,152" into "150-152"):
if ($importedRecordsArray[$i] == ($importedRecordsArray[$i - 1] + 1)) // if this number is consecutive to the previous one
{
if (!preg_match("/-$/", $recordSerialsQueryString))
$recordSerialsQueryString .= "-"; // start range
if ($i == ($importedRecordsCount - 1)) // if this is the last item in the array
$recordSerialsQueryString .= $importedRecordsArray[$i]; // end range
}
else // this number is NOT consecutive to the previous one
{
if (preg_match("/-$/", $recordSerialsQueryString))
$recordSerialsQueryString .= $importedRecordsArray[$i - 1]; // end any previous range
$recordSerialsQueryString .= "," . $importedRecordsArray[$i]; // append this number using a comma as a delimiter
}
}
// Send EMAIL announcement:
if ($sendEmailAnnouncements == "yes")
{
// variables '$sendEmailAnnouncements', '$mailingListEmail', '$officialDatabaseName' and '$databaseBaseURL' are specified in 'ini.inc.php';
// '$loginFirstName' and '$loginLastName' are provided as session variables by the 'start_session()' function in 'include.inc.php'
// send a notification email to the mailing list email address given in '$mailingListEmail':
$emailRecipient = "Literature Database Announcement List <" . $mailingListEmail . ">";
if ($importedRecordsCount == 1)
{
$emailSubject = "New record added to the " . $officialDatabaseName;
$emailBodyIntro = "One record has been added to the " . $officialDatabaseName . ":";
$detailsURL = $databaseBaseURL . "show.php?record=" . $importedRecordsArray[0];
}
else // $importedRecordsCount > 1
{
$emailSubject = "New records added to the " . $officialDatabaseName;
$emailBodyIntro = $importedRecordsCount . " records have been added to the " . $officialDatabaseName . ":";
$detailsURL = $databaseBaseURL . "show.php?records=" . $recordSerialsQueryString;
}
$emailBody = $emailBodyIntro
. "\n\n added by: " . $loginFirstName . " " . $loginLastName
. "\n details: " . $detailsURL
. "\n";
sendEmail($emailRecipient, $emailSubject, $emailBody); // function 'sendEmail()' is defined in 'include.inc.php'
}
if ($importedRecordsCount == 1)
$headerMessage = $importedRecordsCount . " " . $loc["RecordSuccessfullyImported"] . ":";
else // $importedRecordsCount > 1
$headerMessage = $importedRecordsCount . " " . $loc["RecordsSuccessfullyImported"] . ":";
// DISPLAY all newly added records:
header("Location: show.php?records=" . $recordSerialsQueryString . "&headerMsg=" . rawurlencode($headerMessage) . "&client=" . $client);
}
else // nothing imported
{
if (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
{
echo "No records imported!\n\n";
}
else
{
// we'll file again this additional error element here so that the 'errors' session variable isn't empty causing 'import.php' to re-load the form data that were submitted by the user
$errors["badRecords"] = "all";
// return an appropriate error message:
$HeaderString = returnMsg($loc["NoRecordsImported"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
// Write back session variables:
saveSessionVariable("errors", $errors);
saveSessionVariable("formVars", $formVars);
header("Location: " . $referer); // redirect to the calling page (normally, 'import.php')
}
}
// --------------------------------------------------------------------
?>