Index: typo3/sysext/indexed_search/class.external_parser.php =================================================================== --- typo3/sysext/indexed_search/class.external_parser.php (Revision 8949) +++ typo3/sysext/indexed_search/class.external_parser.php (Arbeitskopie) @@ -176,6 +176,21 @@ } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipNotFound'), $unzipPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipDisabled'), 1); break; + case 'docx': // Microsoft Word 2007/2008 + case 'dotx': + case 'xlsx': // Microsoft Excel 2007/2008 + case 'xltx': + case 'pptx': // Microsoft PowerPoint + case 'ppsx': + case 'potx': + if ($indexerConfig['unzip']) { + $unzipPath = rtrim($indexerConfig['unzip'], '/').'/'; + if (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe)) { + $this->app['unzip'] = $unzipPath.'unzip'.$exe; + $extOK = TRUE; + } else $this->pObj->log_setTSlogMessage("'unzip' tool for reading DOCX/XLSX/PPTX files was not found in path '".$unzipPath."unzip'",3); + } else $this->pObj->log_setTSlogMessage('unzip tool (docx/pptx/xlsx) disabled', 1); + break; case 'rtf': // Catdoc if ($indexerConfig['unrtf']) { @@ -226,6 +241,13 @@ case 'pps': // MS PowerPoint case 'ppt': // MS PowerPoint case 'xls': // MS Excel + case 'docx': // \ + case 'dotx': // \ + case 'pptx': // -- Microsoft Office 2007/2008 + case 'ppsx': // / + case 'potx': // / + case 'xlsx': // / + case 'xltx': case 'sxc': // Open Office Calc. case 'sxi': // Open Office Impress case 'sxw': // Open Office Writer @@ -272,6 +294,9 @@ } break; case 'doc': + case 'dot': + case 'docx': + case 'dotx': // Catdoc if ($indexerConfig['catdoc']) { return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.DOC'), $extension); @@ -279,12 +304,19 @@ break; case 'pps': // MS PowerPoint(?) case 'ppt': // MS PowerPoint + case 'pot': // MS PowerPoint + case 'pptx': // MS PowerPoint + case 'ppsx': // ... + case 'potx': // // ppthtml if ($indexerConfig['ppthtml']) { return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.PP'), $extension); } break; case 'xls': // MS Excel + case 'xlt': // MS Excel + case 'xlsx': // MS Excel + case 'xltx': // ... // Xlhtml if ($indexerConfig['xlhtml']) { return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.XLS'), $extension); @@ -504,6 +536,54 @@ } } break; + case 'docx': + case 'dotx': + case 'xlsx': + case 'xltx': + case 'pptx': + case 'ppsx': + case 'potx': + if($this->app['unzip']) { + + // content + switch($ext) { + case 'docx': + case 'dotx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' word/document.xml'; + break; + case 'xlsx': + case 'xltx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml'; + break; + case 'ppsx': + case 'pptx': + case 'potx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' ppt/slides/slide1.xml'; + break; + } + exec($cmd,$res); + $content_xml = implode(chr(10),$res); + unset($res); + + $utf8_content = trim(strip_tags(str_replace('<',' <',$content_xml))); + $contentArr = $this->pObj->splitRegularContent($utf8_content); + + // Metainformations: + $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' docProps/core.xml'; + exec($cmd,$res); + $core_xml = implode(chr(10),$res); + unset($res); + + $coreContent = t3lib_div::xml2tree($core_xml); + $contentArr['title'] = basename($absFile); + $contentArr['title'] .= (string) ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:title'][0]['values'][0]; + $contentArr['description'] = (string) $coreContent['cp:coreProperties'][0]['ch']['dc:subject'][0]['values'][0]; + $contentArr['description'] .= ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:description'][0]['values'][0]; + $contentArr['description'] .= $coreContent['cp:coreProperties'][0]['ch']['dc:creator'][0]['values'][0]; + $contentArr['keywords'] .= (string) $coreContent['cp:coreProperties'][0]['ch']['cp:keywords'][0]['values'][0]; + + } + break; case 'rtf': if ($this->app['unrtf']) { $cmd = $this->app['unrtf'] . ' ' . escapeshellarg($absFile); Index: typo3/sysext/indexed_search/ext_localconf.php =================================================================== --- typo3/sysext/indexed_search/ext_localconf.php (Revision 8949) +++ typo3/sysext/indexed_search/ext_localconf.php (Arbeitskopie) @@ -32,6 +32,13 @@ 'ods' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', 'odp' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', 'odt' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'docx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'dotx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'pptx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'ppsx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'potx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'xlsx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', + 'xltx' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', 'rtf' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', 'txt' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', 'html' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse',