diff --git a/citesphere-importer/pom.xml b/citesphere-importer/pom.xml index 4d1ee8d..ad306b9 100644 --- a/citesphere-importer/pom.xml +++ b/citesphere-importer/pom.xml @@ -15,7 +15,9 @@ <spring-data.version>Lovelace-RELEASE</spring-data.version> <thymeleaf.version>3.0.11.RELEASE</thymeleaf.version> <spring.kafka.version>2.2.6.RELEASE</spring.kafka.version> - <citesphere.messages.version>0.2</citesphere.messages.version> + <citesphere.messages.version>0.6</citesphere.messages.version> + <citesphere.model.version>1.24</citesphere.model.version> + <spring-security-oauth2-version>2.4.0.RELEASE</spring-security-oauth2-version> <admin.password>$2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i</admin.password> @@ -69,7 +71,11 @@ <artifactId>citesphere-messages</artifactId> <version>${citesphere.messages.version}</version> </dependency> - + <dependency> + <groupId>edu.asu.diging</groupId> + <artifactId>citesphere-model</artifactId> + <version>${citesphere.model.version}</version> + </dependency> <!-- Spring --> <dependency> <groupId>org.springframework</groupId> @@ -122,6 +128,12 @@ <artifactId>spring-aop</artifactId> </dependency> + <dependency> + <groupId>org.springframework.security.oauth</groupId> + <artifactId>spring-security-oauth2</artifactId> + <version>${spring-security-oauth2-version}</version> + </dependency> + <dependency> <groupId>org.aspectj</groupId> <artifactId>aspectjrt</artifactId> @@ -302,6 +314,12 @@ <version>1.10.19</version> <scope>test</scope> </dependency> + + <dependency> + <groupId>org.springframework</groupId> + <artifactId>spring-test</artifactId> + <scope>test</scope> + </dependency> <dependency> <groupId>com.opencsv</groupId> diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java index 6b4f845..6b1deb4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java @@ -5,6 +5,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.kafka.annotation.KafkaListener; import com.fasterxml.jackson.databind.ObjectMapper; @@ -18,7 +19,12 @@ public class ReferenceImportListener { private final Logger logger = LoggerFactory.getLogger(getClass()); @Autowired - private IImportProcessor processor; + @Qualifier("importProcessor") + private IImportProcessor fileProcessor; + + @Autowired + @Qualifier("collectionImportProcessor") + private IImportProcessor collectionProcessor; @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_TOPIC) public void receiveMessage(String message) { @@ -32,6 +38,21 @@ public void receiveMessage(String message) { return; } - processor.process(msg); + fileProcessor.process(msg); + } + + @KafkaListener(topics = KafkaTopics.COLLECTION_IMPORT_TOPIC) + public void receiveCollectionImportMessage(String message) { + ObjectMapper mapper = new ObjectMapper(); + KafkaJobMessage msg = null; + try { + msg = mapper.readValue(message, KafkaJobMessage.class); + } catch (IOException e) { + logger.error("Could not unmarshall message.", e); + return; + } + + collectionProcessor.process(msg); } + } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Affiliation.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Affiliation.java index 120a8f8..818474e 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Affiliation.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Affiliation.java @@ -3,6 +3,8 @@ public class Affiliation { private String name; + private String uri; + private String localAuthorityId; public String getName() { return name; @@ -11,4 +13,22 @@ public String getName() { public void setName(String name) { this.name = name; } + + public String getUri() { + return uri; + } + + public void setUri(String uri) { + this.uri = uri; + } + + public String getLocalAuthorityId() { + return localAuthorityId; + } + + public void setLocalAuthorityId(String localAuthorityId) { + this.localAuthorityId = localAuthorityId; + } + + } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java index 1b403f5..9c354b1 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java @@ -3,10 +3,16 @@ import java.util.ArrayList; import java.util.List; +import com.fasterxml.jackson.annotation.JsonProperty; + +import edu.asu.diging.citesphere.model.bib.IGilesUpload; + public class ArticleMeta { + private List<String> collections; private List<ArticleId> articleIds; private String articleTitle; + private String articleShortTitle; private List<ArticleCategoryGroup> categoryGroups = new ArrayList<ArticleCategoryGroup>(); private List<Contributor> contributors; private String authorNotesCorrespondence; @@ -25,10 +31,13 @@ public class ArticleMeta { private String copyrightYear; private String copyrightHolder; private String selfUri; + @JsonProperty("DOI") + private String doi; private String articleAbstract; private String language; private ReviewInfo reviewInfo; private String documentType; + private String filePath; private String conferenceTitle; private String conferenceDate; private String conferenceLocation; @@ -43,7 +52,14 @@ public class ArticleMeta { private List<Reference> references; private String referenceCount; private String retrievalDate; + private List<IGilesUpload> gilesUpload; + public List<String> getCollections() { + return collections; + } + public void setCollections(List<String> collections) { + this.collections = collections; + } public List<ArticleId> getArticleIds() { return articleIds; } @@ -56,6 +72,12 @@ public String getArticleTitle() { public void setArticleTitle(String articleTitle) { this.articleTitle = articleTitle; } + public String getArticleShortTitle() { + return articleShortTitle; + } + public void setArticleShortTitle(String articleShortTitle) { + this.articleShortTitle = articleShortTitle; + } public List<ArticleCategoryGroup> getCategories() { return categoryGroups; } @@ -164,6 +186,12 @@ public String getSelfUri() { public void setSelfUri(String selfUri) { this.selfUri = selfUri; } + public String getDoi() { + return doi; + } + public void setDoi(String doi) { + this.doi = doi; + } public String getArticleAbstract() { return articleAbstract; } @@ -188,6 +216,12 @@ public String getDocumentType() { public void setDocumentType(String documentType) { this.documentType = documentType; } + public String getFilePath() { + return filePath; + } + public void setFilePath(String filePath) { + this.filePath = filePath; + } public String getConferenceTitle() { return conferenceTitle; } @@ -272,5 +306,11 @@ public String getRetrievalDate() { public void setRetrievalDate(String retrievalDate) { this.retrievalDate = retrievalDate; } + public List<IGilesUpload> getGilesUpload() { + return gilesUpload; + } + public void setGilesUpload(List<IGilesUpload> gilesUpload) { + this.gilesUpload = gilesUpload; + } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Contributor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Contributor.java index 623720c..1af7730 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Contributor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Contributor.java @@ -13,6 +13,7 @@ public class Contributor { private String fullSurname; private String fullName; private String emailAddress; + private String uri; private List<Affiliation> affiliations = new ArrayList<Affiliation>(); private List<ContributorId> ids; @@ -64,6 +65,15 @@ public String getEmailAddress() { public void setEmailAddress(String emailAddress) { this.emailAddress = emailAddress; } + public String getUri() { + return uri; + } + public void setUri(String uri) { + this.uri = uri; + } + public void setFullStandardizeName(String fullStandardizeName) { + this.fullStandardizeName = fullStandardizeName; + } public List<Affiliation> getAffiliations() { return affiliations; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IGilesConnector.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IGilesConnector.java new file mode 100644 index 0000000..a7f7250 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IGilesConnector.java @@ -0,0 +1,8 @@ +package edu.asu.diging.citesphere.importer.core.service; + +import edu.asu.diging.citesphere.model.bib.IGilesUpload; + +public interface IGilesConnector { + + IGilesUpload uploadFile(String username, String token, String filename, byte[] fileBytes); +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/GilesConnector.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/GilesConnector.java new file mode 100644 index 0000000..efec994 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/GilesConnector.java @@ -0,0 +1,87 @@ +package edu.asu.diging.citesphere.importer.core.service.giles.impl; + +import javax.annotation.PostConstruct; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.PropertySource; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Service; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.RestTemplate; + +import edu.asu.diging.citesphere.importer.core.service.IGilesConnector; +import edu.asu.diging.citesphere.model.bib.IGilesUpload; +import edu.asu.diging.citesphere.model.bib.impl.GilesUpload; + +@Service +@PropertySource("classpath:/config.properties") +public class GilesConnector implements IGilesConnector { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private RestTemplate restTemplate; + + @Value("${_giles_baseurl}") + private String gilesBaseurl; + + @Value("${_giles_upload_endpoint}") + private String uploadEndpoint; + + @PostConstruct + public void init() { + restTemplate = new RestTemplate(); + } + + @Override + public IGilesUpload uploadFile(String username, String token, String filename, byte[] fileBytes) { + + if (restTemplate == null) { + restTemplate = new RestTemplate(); // Ensure restTemplate is initialized + } + + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.MULTIPART_FORM_DATA); + headers.setBearerAuth(token); + + MultiValueMap<String, Object> body = new LinkedMultiValueMap<>(); + body.add("files", new MultipartFileResource(fileBytes, filename)); + + HttpEntity<MultiValueMap<String, Object>> requestEntity = new HttpEntity<>(body, headers); + + ResponseEntity<UploadResponse> response = null; + try { + response = restTemplate.exchange(gilesBaseurl + uploadEndpoint, HttpMethod.POST, requestEntity, UploadResponse.class); + } catch (Exception ex) { + logger.error("Upload request failed", ex); + return null; + } + IGilesUpload upload = new GilesUpload(); + upload.setProgressId(response.getBody().getId()); + upload.setUploadingUser(username); + return upload; + } + + public class MultipartFileResource extends ByteArrayResource { + + private String filename; + + public MultipartFileResource(byte[] bytearray, String filename) { + super(bytearray); + this.filename = filename; + } + + @Override + public String getFilename() { + return this.filename; + } + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/UploadResponse.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/UploadResponse.java new file mode 100644 index 0000000..76fe9c8 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/giles/impl/UploadResponse.java @@ -0,0 +1,22 @@ +package edu.asu.diging.citesphere.importer.core.service.giles.impl; + +class UploadResponse { + private String id; + private String checkUrl; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getCheckUrl() { + return checkUrl; + } + + public void setCheckUrl(String checkUrl) { + this.checkUrl = checkUrl; + } +} \ No newline at end of file diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CollectionImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CollectionImportProcessor.java new file mode 100644 index 0000000..2d5525a --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CollectionImportProcessor.java @@ -0,0 +1,202 @@ +package edu.asu.diging.citesphere.importer.core.service.impl; + +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; + +import javax.annotation.PostConstruct; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; +import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; +import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.ItemType; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector; +import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; +import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; +import edu.asu.diging.citesphere.messages.KafkaTopics; +import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; +import edu.asu.diging.citesphere.messages.model.KafkaImportReturnMessage; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.citesphere.messages.model.ResponseCode; +import edu.asu.diging.citesphere.messages.model.Status; + +@Service +public class CollectionImportProcessor implements IImportProcessor { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Autowired + private ICitesphereConnector connector; + + @Autowired + private IHandlerRegistry handlerRegistry; + + @Autowired + private IZoteroConnector zoteroConnector; + + @Autowired + private IJsonGenerationService generationService; + + @Autowired + private KafkaRequestProducer requestProducer; + + private Map<String, ItemType> itemTypeMapping = new HashMap<>(); + + @PostConstruct + public void init() { + // this needs to be changed and improved, but for now it works + itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.LETTER, ItemType.LETTER); + itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); + itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); + } + + /* + * (non-Javadoc) + * + * @see + * edu.asu.diging.citesphere.importer.core.service.impl.IImportProcessor#process + * (edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaJobMessage) + */ + @Override + public void process(KafkaJobMessage message) { + JobInfo info = getJobInfo(message); + if (info == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X10); + return; + } + + String filePath = downloadFile(message); + if (filePath == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); + return; + } + + sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); + BibEntryIterator bibIterator = null; + try { + bibIterator = handlerRegistry.handleFile(info, filePath); + } catch (IteratorCreationException e1) { + logger.error("Could not create iterator.", e1); + } + + if (bibIterator == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); + return; + } + + ObjectMapper mapper = new ObjectMapper(); + ArrayNode root = mapper.createArrayNode(); + int entryCounter = 0; + Map<String, String> filesMap = new HashMap<>(); + while (bibIterator.hasNext()) { + BibEntry entry = bibIterator.next(); + if (entry.getArticleType() == null) { + // something is wrong with this entry, let's ignore it + continue; + } + ItemType type = itemTypeMapping.get(entry.getArticleType()); + JsonNode template = zoteroConnector.getTemplate(type); + ObjectNode bibNode = generationService.generateJson(template, entry); + if(entry.getArticleMeta().getFilePath() != null) { + filesMap.put(entry.getArticleMeta().getArticleTitle(), entry.getArticleMeta().getFilePath()); + } + root.add(bibNode); + entryCounter++; + + // we can submit max 50 entries to Zotoro + if (entryCounter >= 50) { + ItemCreationResponse response = submitEntries(root, info); + entryCounter = 0; + root = mapper.createArrayNode(); + } + + } + + bibIterator.close(); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); + + } + + private void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) { + KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId); + returnMessage.setStatus(status); + returnMessage.setCode(code); + try { + requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); + } catch (MessageCreationException e) { + logger.error("Exception sending message.", e); + } + } + + private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); + try { + String msg = mapper.writeValueAsString(entries); + logger.info("Submitting " + msg); + ItemCreationResponse response = zoteroConnector.addEntries(info, entries); + if (response != null) { + logger.info(response.getSuccessful() + ""); + logger.error(response.getFailed() + ""); + } else { + logger.error("Item creation failed."); + } + return response; + } catch (URISyntaxException e) { + logger.error("Could not store new entry.", e); + } catch (JsonProcessingException e) { + logger.error("Could not write JSON."); + } + return null; + } + + private JobInfo getJobInfo(KafkaJobMessage message) { + JobInfo info = null; + try { + info = connector.getJobInfo(message.getId()); + } catch (CitesphereCommunicationException e) { + logger.error("Could not get Zotero info.", e); + return null; + } + return info; + } + + private String downloadFile(KafkaJobMessage message) { + String file = null; + try { + file = connector.getUploadeFile(message.getId()); + } catch (CitesphereCommunicationException e) { + logger.error("Could not get Zotero info.", e); + return null; + } + return file; + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java index 8eb8026..1768227 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java @@ -8,6 +8,9 @@ public class JobInfo { private String zotero; private String zoteroId; private String groupId; + private String collectionId; + private String giles; + private String username; public String getZotero() { return zotero; @@ -27,5 +30,23 @@ public String getGroupId() { public void setGroupId(String groupId) { this.groupId = groupId; } + public String getCollectionId() { + return collectionId; + } + public void setCollectionId(String collectionId) { + this.collectionId = collectionId; + } + public String getGiles() { + return giles; + } + public void setGiles(String giles) { + this.giles = giles; + } + public String getUsername() { + return username; + } + public void setUsername(String username) { + this.username = username; + } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/BibFileHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/BibFileHandler.java new file mode 100644 index 0000000..d5c998d --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/BibFileHandler.java @@ -0,0 +1,36 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.impl; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException; +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; +import edu.asu.diging.citesphere.importer.core.service.IGilesConnector; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.importer.core.service.parse.iterators.BibFileIterator; + + +@Service +public class BibFileHandler implements FileHandler { + + @Autowired + private IGilesConnector gilesConnector; + + @Override + public boolean canHandle(String path) throws HandlerTestException { + if (path.toLowerCase().endsWith(".bib")) { + return true; + } + return false; + } + + @Override + public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) + throws IteratorCreationException { + return new BibFileIterator(path, info, gilesConnector); + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/BibFileIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/BibFileIterator.java new file mode 100644 index 0000000..15194cb --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/BibFileIterator.java @@ -0,0 +1,348 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.iterators; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.social.zotero.api.Data; +import org.springframework.social.zotero.api.Item; + +import edu.asu.diging.citesphere.factory.impl.ParseExtra; +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.impl.Affiliation; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleId; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticlePublicationDate; +import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContributionType; +import edu.asu.diging.citesphere.importer.core.model.impl.Contributor; +import edu.asu.diging.citesphere.importer.core.model.impl.Issn; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.model.impl.Reference; +import edu.asu.diging.citesphere.importer.core.service.IGilesConnector; +import edu.asu.diging.citesphere.importer.core.service.giles.impl.GilesConnector; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.model.bib.IAffiliation; +import edu.asu.diging.citesphere.model.bib.ICitation; +import edu.asu.diging.citesphere.model.bib.ICreator; +import edu.asu.diging.citesphere.model.bib.IGilesUpload; +import edu.asu.diging.citesphere.model.bib.IPerson; +import edu.asu.diging.citesphere.model.bib.IReference; +import edu.asu.diging.citesphere.model.bib.impl.Citation; +import edu.asu.diging.citesphere.model.bib.impl.Person; + +public class BibFileIterator implements BibEntryIterator { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private ParseExtra parseExtra; + + private String filePath; + private String groupId; + private String collectionId; + private JobInfo info; + private Iterator<String> lineIterator; + private Map<String, String> typeMap; + private IGilesConnector gilesConnector; + + public BibFileIterator(String filePath, JobInfo info, IGilesConnector gilesConnector) { + this.filePath = filePath; + this.groupId = info.getGroupId(); + this.collectionId = info.getCollectionId(); + this.info = info; + this.gilesConnector = gilesConnector; + parseExtra = new ParseExtra(); + parseExtra.init(); + init(); + } + + private void init() { + try { + lineIterator = FileUtils.lineIterator(new File(filePath), "UTF-8"); + } catch (IOException e) { + logger.error("Could not create line iterator.", e); + } + typeMap = new HashMap<String, String>(); + typeMap.put("article", Publication.ARTICLE); + typeMap.put("book", Publication.BOOK); + typeMap.put("book-chapter", Publication.BOOK_CHAPTER); + typeMap.put("monograph", Publication.BOOK); + typeMap.put("proceedings-article", Publication.PROCEEDINGS_PAPER); + typeMap.put("book-section", Publication.BOOK_CHAPTER); + typeMap.put("research-article", Publication.ARTICLE); + typeMap.put("book-review", Publication.REVIEW); + typeMap.put("patent", Publication.PROCEEDINGS_PAPER); + } + + @Override + public BibEntry next() { + BibEntry entry = new Publication(); + Map<String, String> fields = new HashMap<>(); + while (lineIterator.hasNext()) { + String line = lineIterator.next().trim(); + if(!line.isBlank() && line.charAt(0)=='@') { + entry.setArticleType(typeMap.get(line.substring(1, line.indexOf('{')))); + } else if (line.equals("}")) { + entry.setJournalMeta(parseJournalMeta(fields)); + entry.setArticleMeta(parseArticleMeta(fields)); + fields.clear(); + break; + } else if (line.contains("=")) { + String[] parts = line.split("=", 2); + if (parts.length == 2) { + String key = parts[0].trim(); + String value = parts[1].trim(); + if (value.endsWith(",")) { + value = value.substring(0, value.length()-1).replaceAll("^\\{|\\}$", ""); + } + fields.put(key, value); + } + } + } + + return entry; + } + + private ContainerMeta parseJournalMeta(Map<String, String> fields) { + ContainerMeta meta = new ContainerMeta(); + + if(fields.containsKey("journal")) { + List<String> journalAbbrev = new ArrayList<>(); + journalAbbrev.add(fields.get("journal")); + meta.setJournalAbbreviations(journalAbbrev); + } + meta.setPublisherName(fields.get("publisher")); + meta.setPublisherLocation(fields.get("place")); + List<Issn> issnList = new ArrayList<Issn>(); + if(fields.get("issn") != null) { + for(String issnString : fields.get("issn").split("and")) { + Issn issn = new Issn(); + issn.setPubType("issn"); + issn.setIssn(issnString.trim()); + issnList.add(issn); + } + } + meta.setIssns(issnList); + meta.setSeriesTitle(fields.get("series")); + return meta; + } + + private ArticleMeta parseArticleMeta(Map<String, String> fields) { + ArticleMeta meta = new ArticleMeta(); + ICitation citation = new Citation(); + Item item = new Item(); + Data data = new Data(); + if(fields.containsKey("annote")) { + data.setNote(fields.get("annote").replaceAll("\\\\", "").replaceAll("\\{textbackslash\\}n", "")); + } + if(fields.containsKey("note")) { + data.setExtra(fields.get("note").replace("\\\\", "").replaceAll("\\{textbackslash\\}n", "")); + } + item.setData(data); + + Set<IPerson> authors = new HashSet<>(); + if (fields.containsKey("author")) { + String[] authorStringList = fields.get("author").split("and"); + for(String authorString: authorStringList) { + IPerson author = new Person(); + String[] authorParts = authorString.split(","); + author.setLastName(authorParts[0].trim()); + author.setFirstName(authorParts[1].trim()); + authors.add(author); + } + } + citation.setAuthors(authors); + + Set<IPerson> editors = new HashSet<>(); + if (fields.containsKey("editor")) { + String[] editorStringList = fields.get("editor").split("and"); + for(String editorString: editorStringList) { + IPerson editor = new Person(); + String[] editorParts = editorString.split(","); + editor.setLastName(editorParts[0].trim()); + editor.setFirstName(editorParts[1].trim()); + editors.add(editor); + } + } + citation.setEditors(editors); + + + Set<ICreator> creators = new HashSet<>(); + citation.setOtherCreators(creators); + + parseExtra.parseMetaDataNote(citation, item); + parseExtra.parseExtra(data, citation); + + List<String> collectionIds = new ArrayList<>(); + if (collectionId != null && !collectionId.trim().isEmpty()) { + collectionIds.add(collectionId); + } + meta.setCollections(collectionIds); + meta.setArticleTitle(fields.get("title")); + meta.setArticleShortTitle(fields.get("shorttitle")); + + List<Contributor> contributors = new ArrayList<>(); + // List of authors + if(citation.getAuthors() != null) { + contributors.addAll(mapPersonToContributor(citation.getAuthors(), ContributionType.AUTHOR)); + } + // List of editors + if(citation.getEditors() != null) { + contributors.addAll(mapPersonToContributor(citation.getEditors(), ContributionType.EDITOR)); + } + // List of other creators + if(citation.getOtherCreators() != null) { + contributors.addAll(mapCreatorToContributor(citation.getOtherCreators())); + + } + meta.setContributors(contributors); + ArticlePublicationDate publicationDate = new ArticlePublicationDate(); + publicationDate.setPublicationYear(fields.get("year")); + meta.setPublicationDate(publicationDate); + meta.setVolume(fields.get("volume")); + meta.setIssue(fields.get("number")); + if(fields.containsKey("pages")) { + meta.setFirstPage(fields.get("pages").split("--")[0].trim()); + meta.setLastPage(fields.get("pages").split("--")[1].trim()); + } + meta.setSelfUri(fields.get("url")); + meta.setDoi(fields.get("doi")); + ArticleId doiId = new ArticleId(); + doiId.setPubIdType("doi"); + doiId.setId(fields.get("doi")); + ArticleId isbnId = new ArticleId(); + isbnId.setPubIdType("isbn"); + isbnId.setId(fields.get("isbn")); + List<ArticleId> articleIds = new ArrayList<>(); + articleIds.add(doiId); + articleIds.add(isbnId); + meta.setArticleIds(articleIds); + meta.setArticleAbstract(fields.get("abstract")); + meta.setLanguage(fields.get("language")); + + if(citation.getReferences() != null) { + meta.setReferences(mapReferences(citation.getReferences())); + meta.setReferenceCount(meta.getReferences().size()+""); + } + + if(fields.containsKey("file")) { + Path path = Paths.get(filePath); + Path folderPath = path.getParent(); + String[] fileParts = fields.get("file").split(":"); + System.out.println(folderPath.toString()+"/"+fileParts[1]); + IGilesUpload upload = createGilesUpload(folderPath.toString()+"/"+fileParts[1], info); + List<IGilesUpload> uploads = new ArrayList<>(); + uploads.add(upload); + meta.setGilesUpload(uploads); + } + + + return meta; + } + + private List<Contributor> mapPersonToContributor(Set<IPerson> citationContributors, String contributionType) { + List<Contributor> contributors = new ArrayList<Contributor>(); + for(IPerson person: citationContributors) { + Contributor contributor = mapSinglePerson(person, contributionType); + contributors.add(contributor); + } + return contributors; + } + + private Contributor mapSinglePerson(IPerson person, String contributionType) { + Contributor contributor = new Contributor(); + contributor.setContributionType(contributionType); + contributor.setGivenName(person.getFirstName()); + contributor.setSurname(person.getLastName()); + contributor.setFullName(person.getName()); + contributor.setUri(person.getUri()); + + List<Affiliation> affiliations = new ArrayList<>(); + if(person.getAffiliations()!= null) { + for(IAffiliation institute: person.getAffiliations()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliation.setUri(institute.getUri()); + affiliation.setLocalAuthorityId(institute.getLocalAuthorityId()); + affiliations.add(affiliation); + } + } + contributor.setAffiliations(affiliations); + return contributor; + } + + private List<Contributor> mapCreatorToContributor(Set<ICreator> creators) { + List<Contributor> contributors = new ArrayList<Contributor>(); + for(ICreator creator: creators) { + contributors.add(mapSinglePerson(creator.getPerson(), creator.getRole())); + } + return contributors; + } + + private List<Reference> mapReferences(Set<IReference> citationReferences) { + List<Reference> references = new ArrayList<Reference>(); + for(IReference citationRef: citationReferences) { + references.add(mapSingleReference(citationRef)); + } + + return references; + } + + private Reference mapSingleReference(IReference citationRef) { + Reference ref = new Reference(); + ref.setAuthorString(citationRef.getAuthorString()); + ref.setTitle(citationRef.getTitle()); + ref.setYear(citationRef.getYear()); + ref.setIdentifier(citationRef.getIdentifier()); + ref.setIdentifierType(citationRef.getIdentifierType()); + ref.setFirstPage(citationRef.getFirstPage()); + ref.setEndPage(citationRef.getEndPage()); + ref.setVolume(citationRef.getVolume()); + ref.setSource(citationRef.getSource()); + ref.setReferenceId(citationRef.getReferenceId()); + ref.setReferenceLabel(citationRef.getReferenceLabel()); + ref.setPublicationType(citationRef.getPublicationType()); + ref.setCitationId(citationRef.getCitationId()); + ref.setReferenceString(citationRef.getReferenceString()); + ref.setReferenceStringRaw(citationRef.getReferenceStringRaw()); + + return ref; + } + + private IGilesUpload createGilesUpload(String gilesFilePath, JobInfo info) { + File file = new File(gilesFilePath); + byte[] fileBytes = null; + try { + fileBytes = Files.readAllBytes(Path.of(gilesFilePath)); + } catch (IOException e) { + e.printStackTrace(); + } + + IGilesUpload upload = gilesConnector.uploadFile(info.getUsername(), info.getGiles(), file.getName(), fileBytes); + return upload; + } + + @Override + public boolean hasNext() { + return lineIterator.hasNext(); + } + + @Override + public void close() { + + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java index 2d9240a..d594192 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java @@ -106,7 +106,7 @@ public JsonNode processRelations(JsonNode node, BibEntry bibEntry) { } public JsonNode processCollections(JsonNode node, BibEntry bibEntry) { - return mapper.createArrayNode(); + return mapper.valueToTree(bibEntry.getArticleMeta().getCollections()); } protected ObjectMapper getObjectMapper() { @@ -283,6 +283,7 @@ public String processExtra(JsonNode node, BibEntry article) { fieldHelper.createReferences(article, root); fieldHelper.createReferenceCount(article, root); fieldHelper.createRetrievalDate(article, root); + fieldHelper.createGilesUpload(article, root); try { return prefix + getObjectMapper().writeValueAsString(root); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/ExtraFieldHelper.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/ExtraFieldHelper.java index 5247b07..118e7a4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/ExtraFieldHelper.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/ExtraFieldHelper.java @@ -4,11 +4,14 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Function; import javax.annotation.PostConstruct; import org.springframework.stereotype.Component; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -355,9 +358,7 @@ public void createReferences(BibEntry article, ObjectNode root) { ArrayNode references = root.putArray("references"); for (Reference ref : article.getArticleMeta().getReferences()) { ObjectNode refNode = references.addObject(); - if (ref.getAuthorString() != null) { - refNode.put("authorString", ref.getAuthorString()); - } + if (ref.getContributors() != null) { ArrayNode contributors = refNode.putArray("contributors"); int idx = 0; @@ -367,52 +368,33 @@ public void createReferences(BibEntry article, ObjectNode root) { idx++; } } - if (ref.getTitle() != null) { - refNode.put("title", ref.getTitle()); - } - if (ref.getEndPage() != null) { - refNode.put("endPage", ref.getEndPage()); - } - if (ref.getFirstPage() != null) { - refNode.put("firstPage", ref.getFirstPage()); - } - if (ref.getIdentifier() != null) { - refNode.put("identifier", ref.getIdentifier()); - } - if (ref.getIdentifierType() != null) { - refNode.put("identifierType", ref.getIdentifierType()); - } - if (ref.getReferenceString() != null) { - refNode.put("referenceString", ref.getReferenceString()); - } - if (ref.getReferenceStringRaw() != null) { - refNode.put("referenceStringRaw", ref.getReferenceStringRaw()); - } - if (ref.getSource() != null) { - refNode.put("source", ref.getSource()); - } - if (ref.getVolume() != null) { - refNode.put("volume", ref.getVolume()); - } - if (ref.getYear() != null) { - refNode.put("year", ref.getYear()); - } - if (ref.getPublicationType() != null) { - refNode.put("publicationType", ref.getPublicationType()); - } - if (ref.getCitationId()!= null) { - refNode.put("citationId", ref.getCitationId()); - } - if (ref.getReferenceId() != null) { - refNode.put("referenceId", ref.getReferenceId()); - } - if (ref.getReferenceLabel() != null) { - refNode.put("referenceLabel", ref.getReferenceLabel()); - } + + putIfNotNull(ref, Reference::getAuthorString, "authorString", refNode); + putIfNotNull(ref, Reference::getTitle, "title", refNode); + putIfNotNull(ref, Reference::getEndPage, "endPage", refNode); + putIfNotNull(ref, Reference::getFirstPage, "firstPage", refNode); + putIfNotNull(ref, Reference::getIdentifier, "identifier", refNode); + putIfNotNull(ref, Reference::getIdentifierType, "identifierType", refNode); + putIfNotNull(ref, Reference::getReferenceString, "referenceString", refNode); + putIfNotNull(ref, Reference::getReferenceStringRaw, "referenceStringRaw", refNode); + putIfNotNull(ref, Reference::getSource, "source", refNode); + putIfNotNull(ref, Reference::getVolume, "volume", refNode); + putIfNotNull(ref, Reference::getYear, "year", refNode); + putIfNotNull(ref, Reference::getPublicationType, "publicationType", refNode); + putIfNotNull(ref, Reference::getCitationId, "citationId", refNode); + putIfNotNull(ref, Reference::getReferenceId, "referenceId", refNode); + putIfNotNull(ref, Reference::getReferenceLabel, "referenceLabel", refNode); } } } + private void putIfNotNull(Reference ref, Function<Reference, String> getter, String key, ObjectNode refNode) { + String value = getter.apply(ref); + if (value != null) { + refNode.put(key, value); + } + } + public void createReferenceCount(BibEntry article, ObjectNode root) { if (article.getArticleMeta().getReferenceCount() != null) { root.put("referenceCount", article.getArticleMeta().getReferenceCount()); @@ -424,6 +406,14 @@ public void createRetrievalDate(BibEntry article, ObjectNode root) { root.put("retrievalDate", article.getArticleMeta().getRetrievalDate()); } } + + public void createGilesUpload(BibEntry article, ObjectNode root) { + if (article.getArticleMeta().getGilesUpload() != null) { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode gilesJson = objectMapper.valueToTree(article.getArticleMeta().getGilesUpload()); + root.put("gilesUploads", gilesJson); + } + } private void fillPerson(Contributor contrib, ObjectNode creatorNode, int idx) { List<String> nameParts = new ArrayList<>(); diff --git a/citesphere-importer/src/main/resources/config.properties b/citesphere-importer/src/main/resources/config.properties index b19b7ab..df90288 100644 --- a/citesphere-importer/src/main/resources/config.properties +++ b/citesphere-importer/src/main/resources/config.properties @@ -24,3 +24,7 @@ _consumer_group=citesphere.importer _zotero_base_url=https://api.zotero.org/ _zotero_template_api_endpoint=/items/new?itemType= _zotero_create_item_api_endpoint=/groups/%s/items + +_giles_baseurl=${giles.baseurl} +_giles_upload_endpoint=/api/v2/files/upload +