X Tutup
/* * The Sleuth Kit * * Contact: Brian Carrier [carrier sleuthkit [dot] org] * Copyright (c) 2010-2012 Basis Technology Corporation. All Rights * reserved. * * This software is distributed under the Common Public License 1.0 */ /** * \file ZipExtractionModule.cpp * Contains the implementation for the Zip extraction file analysis module. * This module extracts zip file content and creates entries in the database * for the extracted files. */ // System includes #include #include #include // Poco includes #include "Poco/Path.h" #include "Poco/Zip/ZipStream.h" #include "Poco/Zip/Decompress.h" // Framework includes #include "TskModuleDev.h" /** * Get the file id corresponding to the last directory on the given path. * If elements along the path have not been seen before, create new entries * for those elements both in the database and in the directory map (3rd parameter). * Note that the parent id for top level directories will be the file id of the zip file. */ static uint64_t getParentIdForPath(Poco::Path& path, const uint64_t fileId, std::string parentPath, std::map& directoryMap) { // If the path references a file, make it refer to to its parent instead if (path.isFile()) path = path.makeParent(); // Initialize parent id to be the file id of the zip file. uint64_t parentId = fileId; // Iterate over every element of the path checking to see if we // already have an entry in the database and in the directory map. Poco::Path tempPath; TskImgDB& imgDB = TskServices::Instance().getImgDB(); std::map::const_iterator pos; for (int i = 0; i < path.depth(); i++) { // Build up a temporary path that only contains the path // elements seen so far. This temporary path will be used // below to add the full path to the map. tempPath.pushDirectory(path[i]); // Have we already seen this path? pos = directoryMap.find(tempPath.toString()); if (pos == directoryMap.end()) { std::string fullpath = ""; fullpath.append(parentPath); fullpath.append("\\"); fullpath.append(path.toString()); // No entry exists for this directory so we create one. if (imgDB.addDerivedFileInfo(path[i], parentId, true, // isDirectory 0, // uncompressed size "", // no details 0, // ctime 0, // crtime 0, // atime 0, // mtime parentId, fullpath) == -1) { std::wstringstream msg; msg << L"ZipExtractionModule - addDerivedFileInfo failed for name=" << path[i].c_str(); LOGERROR(msg.str()); } // Add the full path (to this point) and new id to the map. directoryMap[tempPath.toString()] = parentId; // Update file status to indicate that it is ready for analysis. imgDB.updateFileStatus(parentId, TskImgDB::IMGDB_FILES_STATUS_READY_FOR_ANALYSIS); } else { parentId = pos->second; } } return parentId; } extern "C" { /** * Module identification function. * * @return The name of the module. */ TSK_MODULE_EXPORT const char *name() { return "ZipExtraction"; } /** * Module identification function. * * @return A description of the module. */ TSK_MODULE_EXPORT const char *description() { return "Extracts the files stored inside of ZIP files"; } /** * Module identification function. * * @return The version of the module. */ TSK_MODULE_EXPORT const char *version() { return "1.0.0"; } /** * Module initialization function. Receives a string of intialization arguments, * typically read by the caller from a pipeline configuration file. * Returns TskModule::OK or TskModule::FAIL. Returning TskModule::FAIL indicates * the module is not in an operational state. * * @param args Initialization arguments. * @return TskModule::OK if initialization succeeded, otherwise TskModule::FAIL. */ TskModule::Status TSK_MODULE_EXPORT initialize(const char* args) { return TskModule::OK; } /** * Module execution function. Receives a pointer to a file the module is to * process. The file is represented by a TskFile interface from which both * file content and file metadata can be retrieved. Returns TskModule::OK, * TskModule::FAIL, or TskModule::STOP. Returning TskModule::FAIL indicates * the module experienced an error processing the file. Returning TskModule::STOP * is a request to terminate processing of the file. * * @param pFile A pointer to a file to be processed. * @returns TskModule::OK on success, TskModule::FAIL on error, or TskModule::STOP. */ TskModule::Status TSK_MODULE_EXPORT run(TskFile * pFile) { if (pFile == NULL) { LOGERROR(L"Zip extraction module passed NULL file pointer."); return TskModule::FAIL; } try { TskImgDB& imgDB = TskServices::Instance().getImgDB(); // Create a map of directory names to file ids to use to // associate files/directories with the correct parent. std::map directoryMap; uint64_t parentId = 0; // Save the file to disk and attempt to open it as an archive file. pFile->save(); std::ifstream input(pFile->getPath().c_str(), std::ios_base::binary); Poco::Zip::ZipArchive archive(input); Poco::Zip::ZipArchive::FileHeaders::const_iterator fh; // Attempt to extract the files contained in the archive file. for (fh = archive.headerBegin(); fh != archive.headerEnd(); ++fh) { Poco::Path path(fh->first); Poco::Path parent = path.parent(); std::string name; if (path.isDirectory()) name = path[path.depth() - 1]; else name = path[path.depth()]; // Determine the parent id of the file. if (path.depth() == 0 || path.isDirectory() && path.depth() == 1) // This file or directory lives at the root so our parent id // is the containing file id. parentId = pFile->getId(); else { // We are not at the root so we need to lookup the id of our // parent directory. std::map::const_iterator pos; pos = directoryMap.find(parent.toString()); if (pos == directoryMap.end()) { // In certain circumstances (Windows Send to zip and .docx files) // there may not be entries in the zip file for directories. // For these cases we create database entries for the directories // so that we can accurately track parent relationships. The // getParentIdForPath() method creates the database entries for the // given path and returns the parentId of the last directory on the path. parentId = getParentIdForPath(parent, pFile->getId(), pFile->getFullPath(), directoryMap); } else { parentId = pos->second; } } // Store some extra details about the derived (i.e, extracted) file. std::stringstream details; details << "second.getFileName() << "\" compressed_size=\"" << fh->second.getCompressedSize() << "\" uncompressed_size=\"" << fh->second.getUncompressedSize() << "\" crc=\"" << fh->second.getCRC() << "\" start_pos=\"" << fh->second.getStartPos() << "\" end_pos=\"" << fh->second.getEndPos() << "\" major_version=\"" << fh->second.getMajorVersionNumber() << "\" minor_version=\"" << fh->second.getMinorVersionNumber() << "\"" << ""; uint64_t fileId; std::string fullpath = ""; fullpath.append(pFile->getFullPath()); fullpath.append("\\"); fullpath.append(path.toString()); if (imgDB.addDerivedFileInfo(name, parentId, path.isDirectory(), fh->second.getUncompressedSize(), details.str(), 0, // ctime 0, // crtime 0, // atime static_cast(fh->second.lastModifiedAt().utcTime()), fileId, fullpath) == -1) { std::wstringstream msg; msg << L"ZipExtractionModule - addDerivedFileInfo failed for name=" << name.c_str(); LOGERROR(msg.str()); } TskImgDB::FILE_STATUS fileStatus = TskImgDB::IMGDB_FILES_STATUS_READY_FOR_ANALYSIS; if (path.isDirectory()) directoryMap[path.toString()] = fileId; else { // Only DEFLATE and STORE compression methods are supported. The STORE method // simply stores a file without compression. if (fh->second.getCompressionMethod() == Poco::Zip::ZipCommon::CM_DEFLATE || fh->second.getCompressionMethod() == Poco::Zip::ZipCommon::CM_STORE) { // Save the file for subsequent processing. Poco::Zip::ZipInputStream zipin(input, fh->second); TskServices::Instance().getFileManager().addFile(fileId, zipin); // Schedule subsequent processing. TskServices::Instance().getScheduler().schedule(Scheduler::FileAnalysis, fileId, fileId); } else { std::wstringstream msg; msg << L"ZipExtractionModule - Unsupported compression method for file: " << name.c_str(); LOGWARN(msg.str()); fileStatus = TskImgDB::IMGDB_FILES_STATUS_ANALYSIS_FAILED; } } // Update file status to indicate that it is ready for analysis. imgDB.updateFileStatus(fileId, fileStatus); } } catch (Poco::IllegalStateException&) { // Poco::IllegalStateException is thrown if the file is not a valid zip file // so we simply skip the file. return TskModule::OK; } catch (Poco::AssertionViolationException&) { // Corrupt zip files are not uncommon, especially for carved files. std::wstringstream msg; msg << L"ZipExtractionModule - Encountered corrupt zip file ( " << pFile->getName().c_str() << L")"; LOGWARN(msg.str()); return TskModule::FAIL; } catch (std::exception& ex) { std::wstringstream msg; msg << L"ZipExtractionModule - Error processing zip file ( " << pFile->getName().c_str() << L") : " << ex.what(); LOGERROR(msg.str()); return TskModule::FAIL; } return TskModule::OK; } /** * Module cleanup function. This is where the module should free any resources * allocated during initialization or execution. * * @returns TskModule::OK on success and TskModule::FAIL on error. */ TskModule::Status TSK_MODULE_EXPORT finalize() { return TskModule::OK; } }
X Tutup