diff --git a/src/main/java/com/geccocrawler/gecco/annotation/Request.java b/src/main/java/com/geccocrawler/gecco/annotation/Request.java new file mode 100644 index 00000000..9b7480ec --- /dev/null +++ b/src/main/java/com/geccocrawler/gecco/annotation/Request.java @@ -0,0 +1,16 @@ +package com.geccocrawler.gecco.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Inherited +@Target(ElementType.FIELD) +@Retention(RetentionPolicy.RUNTIME) +public @interface Request { + + String value() default ""; + +} diff --git a/src/main/java/com/geccocrawler/gecco/spider/render/AbstractRender.java b/src/main/java/com/geccocrawler/gecco/spider/render/AbstractRender.java new file mode 100644 index 00000000..e6091c88 --- /dev/null +++ b/src/main/java/com/geccocrawler/gecco/spider/render/AbstractRender.java @@ -0,0 +1,101 @@ +package com.geccocrawler.gecco.spider.render; + +import java.lang.reflect.Field; +import java.util.List; +import java.util.Set; + +import net.sf.cglib.beans.BeanMap; + +import org.apache.commons.lang3.StringUtils; +import org.reflections.ReflectionUtils; + +import com.geccocrawler.gecco.GeccoEngineThreadLocal; +import com.geccocrawler.gecco.annotation.FieldRenderName; +import com.geccocrawler.gecco.annotation.Href; +import com.geccocrawler.gecco.request.HttpRequest; +import com.geccocrawler.gecco.response.HttpResponse; +import com.geccocrawler.gecco.spider.SpiderBean; +import com.geccocrawler.gecco.spider.render.CustomFieldRender; +import com.geccocrawler.gecco.spider.render.CustomFieldRenderFactory; +import com.geccocrawler.gecco.spider.render.Render; +import com.geccocrawler.gecco.spider.render.RequestFieldRender; +import com.geccocrawler.gecco.spider.render.RequestParameterFieldRender; +import com.geccocrawler.gecco.utils.ReflectUtils; + +/** + * render抽象方法,主要包括注入基本的属性和自定义属性注入。将特定的html、json、xml注入放入实现类 + * + * @author huchengyi + * + */ +public abstract class AbstractRender implements Render { + + private RequestFieldRender requestFieldRender; + + private RequestParameterFieldRender requestParameterFieldRender; + + private CustomFieldRenderFactory customFieldRenderFactory; + + public AbstractRender(CustomFieldRenderFactory customFieldRenderFactory) { + this.requestFieldRender = new RequestFieldRender(); + this.requestParameterFieldRender = new RequestParameterFieldRender(); + this.customFieldRenderFactory = customFieldRenderFactory; + } + + @Override + public SpiderBean inject(Class clazz, HttpRequest request, HttpResponse response) { + try { + SpiderBean bean = clazz.newInstance(); + BeanMap beanMap = BeanMap.create(bean); + requestFieldRender.render(request, response, beanMap, bean); + requestParameterFieldRender.render(request, response, beanMap, bean); + render(request, response, beanMap, bean); + Set customFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(FieldRenderName.class)); + for(Field customField : customFields) { + FieldRenderName fieldRender = customField.getAnnotation(FieldRenderName.class); + String name = fieldRender.value(); + CustomFieldRender customFieldRender = customFieldRenderFactory.getCustomFieldRender(name); + if(customFieldRender != null) { + customFieldRender.render(request, response, beanMap, bean, customField); + } + } + requests(request, bean); + return bean; + } catch(Exception ex) { + ex.printStackTrace(); + return null; + } + } + + public abstract void render(HttpRequest request, HttpResponse response, BeanMap beanMap, SpiderBean bean); + + @Override + public void requests(HttpRequest request, SpiderBean bean) { + BeanMap beanMap = BeanMap.create(bean); + Set hrefFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(Href.class)); + for(Field hrefField : hrefFields) { + Href href = hrefField.getAnnotation(Href.class); + if(href.click()) { + Object o = beanMap.get(hrefField.getName()); + if(o == null) { + continue; + } + boolean isList = ReflectUtils.haveSuperType(o.getClass(), List.class);//是List类型 + if(isList) { + List list = (List)o; + for(String url : list) { + if(StringUtils.isNotEmpty(url)) { + GeccoEngineThreadLocal.getScheduler().into(request.subRequest(url)); + } + } + } else { + String url = (String)o; + if(StringUtils.isNotEmpty(url)) { + GeccoEngineThreadLocal.getScheduler().into(request.subRequest(url)); + } + } + } + } + } + +} diff --git a/src/main/java/com/geccocrawler/gecco/spider/render/RequestFieldRender.java b/src/main/java/com/geccocrawler/gecco/spider/render/RequestFieldRender.java new file mode 100644 index 00000000..4361870a --- /dev/null +++ b/src/main/java/com/geccocrawler/gecco/spider/render/RequestFieldRender.java @@ -0,0 +1,26 @@ +package com.geccocrawler.gecco.spider.render; + +import java.lang.reflect.Field; +import java.util.Set; + +import net.sf.cglib.beans.BeanMap; + +import org.reflections.ReflectionUtils; + +import com.geccocrawler.gecco.annotation.Request; +import com.geccocrawler.gecco.request.HttpRequest; +import com.geccocrawler.gecco.response.HttpResponse; +import com.geccocrawler.gecco.spider.SpiderBean; + +public class RequestFieldRender implements FieldRender { + + @Override + public void render(HttpRequest request, HttpResponse response, BeanMap beanMap, SpiderBean bean) { + Set requestFields = ReflectionUtils.getAllFields(bean.getClass(), ReflectionUtils.withAnnotation(Request.class)); + if(requestFields.size() > 0) { + Field field = requestFields.iterator().next(); + beanMap.put(field.getName(), request); + } + } + +}