$hits[1], 'year' => $hits[2], 'title' => $hits[3], 'city' => $hits[4], 'publisher' => $hits[5]); if (strpos ($hits[3], '(')!==false) { $tmp = explode ('(', $hits[3]); $back['title'] = trim($tmp[0]); $back['series'] = trim($tmp[1], " \(\)\n\r\t\{\}\,\."); } } return $back; } private static function _trim ($arr) { $back = array(); foreach ($arr as $offset=>$line) { $line = trim($line, " \n\r\t\{\}\,\."); $line = rtrim($line, "("); $line = rtrim($line, " "); $line = rtrim($line, ","); $line = ltrim($line, ")"); $line = ltrim($line, " "); $back[$offset] = ltrim($line, ","); } return $back; } private static function _journal2arr ($citation) { /* // Authors (year). title. journal, i(v), pages. * */ if (substr ($citation, -1)!='.') { $citation .= '.'; } preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.]+|.*[^?]+)' // title in form 'title.' or 'title?' .'[\.?]' .'(.*?[^0-9]+)' // journal (some have commas, so until ( or digit (pages, volume,...) .'(.*?[^,]+)' // vol/issue (match until comma) .'\,' .'(.*?[^\.\,]+)' // pages (match against , or .) .'(\.|,)' // check for size .'(.*)' // check for size .'/', $citation, $hits); if (count($hits) == 9) { $hits = citation::_trim($hits); return array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'journal' => $hits[4], 'issue' => $hits[5], 'pages' => $hits[6] ); } else { // plan B: // retrieve authors, year, title, journal and leave the rest. preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*?[^.]+|.*?[^?]+)' // title in form 'title.' or 'title?' .'[\.?]' .'(.*?[^.]+|.*?[^,]+)' // journal in form 'title.' or 'title?' .'[\.,]' .'(.*)' // check for size .'/', $citation, $hits); if (count($hits) == 6 && trim($hits[4]!="")) { $hits = citation::_trim($hits); return array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'journal' => $hits[4] ); } else { echo "\n***Journal article not recognised***\n"; print_r ($hits); return -1; } } } private static function _workpaper2arr ($citation) { /* * This is problematic, as there are many different forms (citation styles). * We're taking Authors (Year). Title. Series. publisher */ // let's just take out fields 1-3 (authors, year, title (whatever it is) preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.]+|.*[^?]+)' // title in form 'title.' or 'title?' .'[\.?]' .'(.*?[^\.]+)' // series .'\.' /* .'(.*?[^\.]+)' // publisher .'[\.]' // check for size */ .'(.*)' // check for size .'/', $citation, $hits); $hits = citation::_trim($hits); // field 3 can be broken. Manual fix needed. return array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3] ); } private static function _conference2arr ($citation) { /* * This is problematic, as there are many different forms (citation styles). * We're taking Authors (Year). Title. - same as working paper */ // let's just take out fields 1-3 (authors, year, title (whatever it is) preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.]+|.*[^?]+)' // title in form 'title.' or 'title?' .'[\.?]' .'(.*) for the' // .'(.*?[^,]+)' .',' .'(.*)' // check for size .'/', $citation, $hits); $hits = citation::_trim($hits); // field 3 can be broken. Manual fix needed. return array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'conference' => $hits[5], 'city' => $hits[6] ); } private static function _report2arr ($citation) { // this is a mess. // it is most often AUTHORS (YEAR). // but what's next is... random. // title. title. publisher. // or // title. series. publisher. // or // title. title. series. publisher. // or title in series .... // let's just take authors, year, [*], publisher $back = -1; /* * Authors (YEAR). TITLE. REPORT_TYPE. Publisher. */ preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.])' // title in form 'title.' or 'title?' .'\.' .'(?:(.*?[^\.]+)\.(.*?[^\.]+))|(.*?[^\.]+)' // title in form 'title.' or 'title?' OR 'title (series)'. Two dots are a problem, sugest changing it to : . .'\.' .'(.*)' // rest .'/', $citation, $hits); // If everything's OK, we get size of 6 if (count($hits)==6) { $hits = citation::_trim ($hits); $back = array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'publisher' => $hits[4]); } return $back; } private static function _thesis2arr ($citation) { $back = -1; /* * * Authors (YEAR). TITLE. TYPE. PUBLISHER. */ preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.]+|.*[^?]+)' // title in form 'title.' or 'title?' .'[\.?]' .'(.*[^.]+|.*[^,]+)' // type .'[\.\,]' .'(.*[^.]+)' // location .'\.' .'(.*)' // rest .'/', $citation, $hits); // If everything's OK, we get size of 6 if (count($hits)==7) { $hits = citation::_trim ($hits); $back = array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'thesis_type' => $hits[4], 'publisher' => $hits[5]); } return $back; } private static function _chapter2arr ($citation) { $back = -1; /* * Authors (YEAR). TITLE. WHERE (pp) publisher) */ preg_match ('/(.*?[^(]+)' // authors - before ( . '\(' // ( .'([0-9]?[^)]+)' // year - digits before ) . '\)\.' // ). .'(.*[^.]+|.*[^?]+) in ' // title in form 'title. IN' or 'title? IN' // .'([\.?] in )' .'(.*?[^)]+)' // book author before ) . '\)' // ). // .'(.*)\(pp' // book .'(.*)\bpp' // book .'(.*?[^)]+)' // pps . '\)[\.\,]' // ). .'(.*[^\:]+)' // location .'\:' .'(.*)' // publisher .'/i', $citation, $hits); // If everything's OK, we get size of 6 if (count($hits)==9) { $hits = citation::_trim ($hits); $back = array ( 'author' => $hits[1], 'year' => $hits[2], 'title' => $hits[3], 'book_author' => trim(preg_replace ('/(.*?[^(]+)(.*)/', '$1',$hits[4])), 'booktitle' => trim ($hits[5], " \n\r\t\v\0("), 'pages' => $hits[6], 'location' => $hits[7], 'publisher' => $hits[8]); } else { print_r ($hits); } return $back; } }