function execute( $par ) { global $wgOut, $wgUser, $wgRequest; if ( !$wgUser->isAllowed( 'exporttsv' ) ) { $wgOut->addHTML( wfMsg( 'ow_exporttsv_not_allowed' ) ); return false; } $dbr = wfGetDB( DB_SLAVE ); $dc = wdGetDataSetcontext(); if ( $wgRequest->getText( 'collection' ) && $wgRequest->getText( 'languages' ) ) { // render the tsv file require_once( 'WikiDataAPI.php' ); require_once( 'Transaction.php' ); // get the collection to export. Cut off the 'cid' part that we added // to make the keys strings rather than numbers in the array sent to the form. $collectionId = substr( $wgRequest->getText( 'collection' ), 3 ); // get the languages requested, turn into an array, trim for spaces. $isoCodes = explode( ',', $wgRequest->getText( 'languages' ) ); for ( $i = 0; $i < count( $isoCodes ); $i++ ) { $isoCodes[$i] = trim( $isoCodes[$i] ); if ( !getLanguageIdForIso639_3( $isoCodes[$i] ) ) { $wgOut->setPageTitle( wfMsg( 'ow_exporttsv_export_failed' ) ); $wgOut->addHTML( wfMsg( 'ow_impexptsv_unknown_lang', $isoCodes[$i] ) ); return false; } } $wgOut->disable(); $languages = $this->getLanguages( $isoCodes ); $isoLookup = $this->createIsoLookup( $languages ); $downloadFileName = $this->createFileName( $isoCodes ); // Force the browser into a download header( 'Content-Type: text/tab-separated-values;charset=utf-8' ); header( 'Content-Disposition: attachment; filename="' . $downloadFileName . '"' ); // attachment // separator character used. $sc = "\t"; echo( pack( 'CCC', 0xef, 0xbb, 0xbf ) ); // start the first row: column names echo( 'defined meaning id' . $sc . 'defining expression' ); foreach ( $isoCodes as $isoCode ) { echo( $sc . 'definition_' . $isoCode . $sc . 'translations_' . $isoCode ); } echo( "\r\n" ); // get all the defined meanings in the collection $query = "SELECT dm.defined_meaning_id, exp.spelling "; $query .= "FROM {$dc}_collection_contents col, {$dc}_defined_meaning dm, {$dc}_expression exp "; $query .= "WHERE col.collection_id=" . $collectionId . " "; $query .= "AND col.member_mid=dm.defined_meaning_id "; $query .= "AND dm.expression_id = exp.expression_id "; $query .= "AND " . getLatestTransactionRestriction( "col" ); $query .= "AND " . getLatestTransactionRestriction( "dm" ); $query .= "AND " . getLatestTransactionRestriction( "exp" ); $query .= "ORDER BY exp.spelling"; // wfDebug($query."\n"); $queryResult = $dbr->query( $query ); while ( $row = $dbr->fetchRow( $queryResult ) ) { $dm_id = $row['defined_meaning_id']; // echo the defined meaning id and the defining expression echo( $dm_id ); echo( "\t" . $row['spelling'] ); // First we'll fill an associative array with the definitions and // translations. Then we'll use the isoCodes array to put them in the // proper order. // the associative array holding the definitions and translations $data = array(); // **************************** // query to get the definitions // **************************** $qry = 'SELECT txt.text_text, trans.language_id '; $qry .= "FROM {$dc}_text txt, {$dc}_translated_content trans, {$dc}_defined_meaning dm "; $qry .= 'WHERE txt.text_id = trans.text_id '; $qry .= 'AND trans.translated_content_id = dm.meaning_text_tcid '; $qry .= "AND dm.defined_meaning_id = $dm_id "; $qry .= 'AND trans.language_id IN ('; for ( $i = 0; $i < count( $languages ); $i++ ) { $language = $languages[$i]; if ( $i > 0 ) $qry .= ","; $qry .= $language['language_id']; } $qry .= ') AND ' . getLatestTransactionRestriction( 'trans' ); $qry .= 'AND ' . getLatestTransactionRestriction( 'dm' ); // wfDebug($qry."\n"); // uncomment this if you accept having 1700+ queries in the log $definitions = $dbr->query( $qry ); while ( $row = $dbr->fetchRow( $definitions ) ) { // $key becomes something like def_eng $key = 'def_' . $isoLookup['id' . $row['language_id']]; $data[$key] = $row['text_text']; } $dbr->freeResult( $definitions ); // ***************************** // query to get the translations // ***************************** $qry = "SELECT exp.spelling, exp.language_id "; $qry .= "FROM {$dc}_expression exp "; $qry .= "INNER JOIN {$dc}_syntrans trans ON exp.expression_id=trans.expression_id "; $qry .= "WHERE trans.defined_meaning_id=$dm_id "; $qry .= "AND " . getLatestTransactionRestriction( "exp" ); $qry .= "AND " . getLatestTransactionRestriction( "trans" ); // wfDebug($qry."\n"); // uncomment this if you accept having 1700+ queries in the log $translations = $dbr->query( $qry ); while ( $row = $dbr->fetchRow( $translations ) ) { // qry gets all languages, we filter them here. Saves an order // of magnitude execution time. if ( isset( $isoLookup['id' . $row['language_id']] ) ) { // $key becomes something like trans_eng $key = 'trans_' . $isoLookup['id' . $row['language_id']]; if ( !isset( $data[$key] ) ) $data[$key] = $row['spelling']; else $data[$key] = $data[$key] . '|' . $row['spelling']; } } $dbr->freeResult( $translations ); // now that we have everything, output the row. foreach ( $isoCodes as $isoCode ) { // if statements save a bunch of notices in the log about // undefined indices. echo( "\t" ); if ( isset( $data['def_' . $isoCode] ) ) echo( $this->escapeDelimitedValue( $data['def_' . $isoCode] ) ); echo( "\t" ); if ( isset( $data['trans_' . $isoCode] ) ) echo( $data['trans_' . $isoCode] ); } echo( "\r\n" ); } } else { // Get the collections $colQuery = "SELECT col.collection_id, exp.spelling " . "FROM {$dc}_collection col INNER JOIN {$dc}_defined_meaning dm ON col.collection_mid=dm.defined_meaning_id " . "INNER JOIN {$dc}_expression exp ON dm.expression_id=exp.expression_id " . "WHERE " . getLatestTransactionRestriction( 'col' ); $collections = array(); $colResults = $dbr->query( $colQuery ); while ( $row = $dbr->fetchRow( $colResults ) ) { $collections['cid' . $row['collection_id']] = $row['spelling']; } // render the page $wgOut->setPageTitle( wfMsg( 'ow_exporttsv_title' ) ); $wgOut->addHTML( wfMsg( 'ow_exporttsv_header' ) ); $wgOut->addHTML( getOptionPanel( array( wfMsg( 'ow_Collection_colon' ) => getSelect( 'collection', $collections, 'cid376322' ), wfMsg( 'ow_exporttsv_languages' ) => getTextBox( 'languages', 'ita, eng, deu, fra, cat' ), ), '', array( 'create' => wfMsg( 'ow_create' ) ) ) ); } }
function execute( $par ) { global $wgOut, $wgUser, $wgRequest; $wgOut->setPageTitle( wfMsg( 'ow_importtsv_title1' ) ); if ( !$wgUser->isAllowed( 'importtsv' ) ) { $wgOut->addHTML( wfMsg( 'ow_importtsv_not_allowed' ) ); return false; } $dbr = wfGetDB( DB_MASTER ); $dc = wdGetDataSetcontext(); $wgOut->setPageTitle( wfMsg( 'ow_importtsv_importing' ) ); setlocale( LC_ALL, 'en_US.UTF-8' ); if ( $wgRequest->getFileName( 'tsvfile' ) ) { // ***************** // process tsv // ***************** require_once( 'WikiDataAPI.php' ); require_once( 'Transaction.php' ); $testRun = $wgRequest->getCheck( 'testrun' ); // lets do some tests first. Is this even a tsv file? // It is _very_ important that the file is utf-8 encoded. // also, this is a good time to determine the max line length for the // fgetcsv function. $file = fopen( $wgRequest->getFileTempname( 'tsvfile' ), 'r' ); $myLine = ""; $maxLineLength = 0; while ( $myLine = fgets( $file ) ) { if ( !preg_match( '/./u', $myLine ) ) { $wgOut->setPageTitle( wfMsg( 'ow_importtsv_import_failed' ) ); $wgOut->addHTML( wfMsg( 'ow_importtsv_not_utf8' ) ); return false; } $maxLineLength = max( $maxLineLength, strlen( $myLine ) + 2 ); } // start from the beginning again. Check if the column names are valid rewind( $file ); $columns = fgetcsv( $file, $maxLineLength, "\t" ); // somehow testing for $columns[0] fails sometimes. Byte Order Mark? if ( !$columns || count( $columns ) <= 2 || $columns[1] != "defining expression" ) { $wgOut->setPageTitle( wfMsg( 'ow_importtsv_import_failed' ) ); $wgOut->addHTML( wfMsg( 'ow_importtsv_not_tsv' ) ); return false; } for ( $i = 2; $i < count( $columns ); $i++ ) { $columnName = $columns[$i]; $baseName = substr( $columnName, 0, strrpos( $columnName, '_' ) ); if ( $baseName == "definition" || $baseName == "translations" ) { $langCode = substr( $columnName, strrpos( $columnName, '_' ) + 1 ); if ( !getLanguageIdForIso639_3( $langCode ) ) { $wgOut->setPageTitle( wfMsg( 'ow_importtsv_import_failed' ) ); $wgOut->addHTML( wfMsg( 'ow_impexptsv_unknown_lang', $langCode ) ); return false; } } else { // column name does not start with definition or translations. $wgOut->setPageTitle( wfMsg( 'ow_importtsv_import_failed' ) ); $wgOut->addHTML( wfMsg( 'ow_importtsv_bad_columns', $columnName ) ); return false; } } // // All tests passed. lets get started // if ( $testRun ) { $wgOut->setPageTitle( wfMsg( 'ow_importtsv_test_run_title' ) ); } else { $wgOut->setPageTitle( wfMsg( 'ow_importtsv_importing' ) ); } startNewTransaction( $wgUser->getID(), wfGetIP(), "Bulk import via Special:ImportTSV", $dc ); # this string shouldn't be localized because it will be stored in the db $row = ""; $line = 1; // actually 2, 1 was the header, but increased at the start of while $definitions = 0; // definitions added $translations = 0; // translations added while ( $row = fgetcsv( $file, $maxLineLength, "\t" ) ) { $line++; $dmid = $row[0]; $exp = $row[1]; // find the defined meaning record $qry = "SELECT dm.meaning_text_tcid, exp.spelling "; $qry .= "FROM {$dc}_defined_meaning dm INNER JOIN {$dc}_expression exp ON dm.expression_id=exp.expression_id "; $qry .= "WHERE dm.defined_meaning_id=$dmid "; $qry .= "AND " . getLatestTransactionRestriction( 'dm' ); $qry .= "AND " . getLatestTransactionRestriction( 'exp' ); $dmResult = $dbr->query( $qry ); $dmRecord = null; // perfomr some tests if ( $dmRecord = $dbr->fetchRow( $dmResult ) ) { if ( $dmRecord['spelling'] != $exp ) { $wgOut->addHTML( "Skipped line $line: defined meaning id $dmid does not match defining expression. Should be '{$dmRecord['spelling']}', found '$exp'.<br />" ); continue; } } else { $wgOut->addHTML( "Skipped line $line: unknown defined meaning id $dmid. The id may have been altered in the imported file, or the defined meaning or defining expression was removed from the database.<br />" ); continue; } // all is well. Get the translated content id $tcid = $dmRecord['meaning_text_tcid']; for ( $columnIndex = 2; $columnIndex < count( $columns ); $columnIndex++ ) { // Google docs removes empty columns at the end of a row, // so if column index is higher than the length of the row, we can break // and move on to the next defined meaning. if ( columnIndex >= count( $row ) ) { break; } $columnValue = $row[$columnIndex]; if ( !$columnValue ) { continue; } $columnName = $columns[$columnIndex]; $langCode = substr( $columnName, strrpos( $columnName, '_' ) + 1 ); $langId = getLanguageIdForIso639_3( $langCode ); if ( strpos( $columnName, 'definition' ) === 0 ) { if ( !translatedTextExists( $tcid, $langId ) ) { if ( $testRun ) { $wgOut->addHTML( "Would add definition for $exp ($dmid) in $langCode: $columnValue.<br />" ); } else { addTranslatedText( $tcid, $langId, $columnValue ); $wgOut->addHTML( "Added definition for $exp ($dmid) in $langCode: $columnValue.<br />" ); $definitions++; } } } if ( strpos( $columnName, 'translation' ) === 0 ) { $spellings = explode( '|', $columnValue ); foreach ( $spellings as $spelling ) { $spelling = trim( $spelling ); $expression = findExpression( $spelling, $langId ); if ( !$expression ) { // expression does not exist if ( $testRun ) { $wgOut->addHTML( "Would add translation for $exp ($dmid) in $langCode: $spelling. Would also add new page.<br />" ); } else { $expression = createExpression( $spelling, $langId ); $expression->bindToDefinedMeaning( $dmid, 1 ); // not nescesary to check page exists, createPage does that. $title = getPageTitle( $spelling ); createPage( 16, $title ); $wgOut->addHTML( "Added translation for $exp ($dmid) in $langCode: $spelling. Also added new page.<br />" ); $translations++; } } else { // expression exists, but may not be bound to this defined meaning. if ( !$expression->isBoundToDefinedMeaning( $dmid ) ) { if ( $testRun ) { $wgOut->addHTML( "Would add translation for $exp ($dmid) in $langCode: $spelling.<br />" ); } else { $expression->bindToDefinedMeaning( $dmid, 1 ); $wgOut->addHTML( "Added translation for $exp ($dmid) in $langCode: $spelling.<br />" ); $translations++; } } } } } } } if ( $definitions == 0 && $translations == 0 ) { $wgOut->addHTML( "<br />" ); if ( $testRun ) { $wgOut->addHTML( wfMsg( 'ow_importtsv_nothing_added_test' ) ); } else { $wgOut->addHTML( wfMsg( 'ow_importtsv_nothing_added' ) ); } $wgOut->addHTML( "<br />" ); } else { $wgOut->addHTML( "<br />" . wfMsgExt( 'ow_importtsv_results', 'parsemag', $definitions, $translations ) . "<br />" ); } } else { // render the page $wgOut->setPageTitle( wfMsg( 'ow_importtsv_title2' ) ); $wgOut->addHTML( wfMsg( 'ow_importtsv_header' ) ); $wgOut->addHTML( getOptionPanelForFileUpload( array( wfMsg( 'ow_importtsv_file' ) => getFileField( 'tsvfile' ), wfMsg( 'ow_importtsv_test_run' ) => getCheckBox( 'testrun', true ) ) ) ); } }